From c408bcbee5475559ddba55d7b3ba37584350d88b Mon Sep 17 00:00:00 2001 From: Felix Berlakovich Date: Tue, 3 Sep 2024 12:24:54 +0200 Subject: [PATCH] cmq --- .gitattributes | 1 + Include/Python.h | 21 + Include/cpython/code.h | 72 ++ Include/cpython/pyframe.h | 4 + Include/internal/pycore_code.h | 4 + Include/internal/pycore_frame.h | 49 + .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_long.h | 1 + Include/internal/pycore_opcode.h | 80 +- .../internal/pycore_runtime_init_generated.h | 1 + .../internal/pycore_unicodeobject_generated.h | 3 + Include/opcode.h | 115 +- Lib/importlib/_bootstrap_external.py | 2 +- Lib/opcode.py | 12 + Makefile.pre.in | 9 +- Modules/main.c | 8 + Objects/codeobject.c | 41 + Objects/typeobject.c | 22 + Programs/test_frozenmain.h | 33 +- Python/bltinmodule.c | 101 ++ Python/bytecodes.c | 89 +- Python/ceval.c | 76 ++ Python/ceval_macros.h | 36 +- Python/cmlq_external_opcode.h | 15 + Python/frame.c | 2 +- Python/generated_cases.c.h | 992 +++++++++--------- Python/opcode_metadata.h | 64 +- Python/opcode_targets.h | 66 +- Python/specialize.c | 514 +++++++++ README.md | 8 + README.rst | 241 ----- Tools/build/generate_opcode_h.py | 2 +- Tools/cases_generator/generate_cases.py | 8 + aclocal.m4 | 36 +- cmlq_examples/adi.py | 82 ++ cmlq_examples/adi_numpy.py | 57 + cmlq_examples/adist.py | 81 ++ cmlq_examples/bench_test.py | 144 +++ cmlq_examples/channel_flow.py | 138 +++ cmlq_examples/floyd_warshall.py | 19 + cmlq_examples/gramschmidt.py | 28 + cmlq_examples/grayscott.py | 38 + cmlq_examples/grouping.py | 20 + cmlq_examples/jacobi_2d.py | 56 + cmlq_examples/l1norm.py | 3 + cmlq_examples/l2norm.py | 7 + cmlq_examples/laplace.py | 111 ++ cmlq_examples/load_papi_to_db.py | 26 + cmlq_examples/nbody.py | 163 +++ cmlq_examples/pairwise.py | 15 + cmlq_examples/ph_arc_distance.py | 15 + cmlq_examples/print_papi.py | 58 + cmlq_examples/resnet.py | 64 ++ cmlq_examples/simulate_stack_effect.py | 49 + cmlq_examples/specialconvolve.py | 7 + cmlq_examples/test_opt.py | 535 ++++++++++ cmlq_examples/util.py | 77 ++ cmlq_examples/vadv.py | 90 ++ config.guess | 16 +- config.sub | 20 +- configure | 535 ++++++---- configure.ac | 70 ++ insights.org | 148 +++ pyconfig.h.in | 15 + 65 files changed, 4253 insertions(+), 1164 deletions(-) create mode 100644 Python/cmlq_external_opcode.h create mode 100644 README.md delete mode 100644 README.rst create mode 100644 cmlq_examples/adi.py create mode 100644 cmlq_examples/adi_numpy.py create mode 100644 cmlq_examples/adist.py create mode 100644 cmlq_examples/bench_test.py create mode 100644 cmlq_examples/channel_flow.py create mode 100644 cmlq_examples/floyd_warshall.py create mode 100644 cmlq_examples/gramschmidt.py create mode 100644 cmlq_examples/grayscott.py create mode 100644 cmlq_examples/grouping.py create mode 100644 cmlq_examples/jacobi_2d.py create mode 100644 cmlq_examples/l1norm.py create mode 100644 cmlq_examples/l2norm.py create mode 100644 cmlq_examples/laplace.py create mode 100644 cmlq_examples/load_papi_to_db.py create mode 100644 cmlq_examples/nbody.py create mode 100644 cmlq_examples/pairwise.py create mode 100644 cmlq_examples/ph_arc_distance.py create mode 100644 cmlq_examples/print_papi.py create mode 100644 cmlq_examples/resnet.py create mode 100644 cmlq_examples/simulate_stack_effect.py create mode 100644 cmlq_examples/specialconvolve.py create mode 100644 cmlq_examples/test_opt.py create mode 100644 cmlq_examples/util.py create mode 100644 cmlq_examples/vadv.py create mode 100644 insights.org diff --git a/.gitattributes b/.gitattributes index 4ed95069442f3d..77097361407fef 100644 --- a/.gitattributes +++ b/.gitattributes @@ -87,6 +87,7 @@ Parser/token.c generated Programs/test_frozenmain.h generated Python/Python-ast.c generated Python/generated_cases.c.h generated +Python/opcode_metadata.h generated Python/opcode_targets.h generated Python/stdlib_module_names.h generated Tools/peg_generator/pegen/grammar_parser.py generated diff --git a/Include/Python.h b/Include/Python.h index 52a7aac6ba6cb6..5f7bbd54c28612 100644 --- a/Include/Python.h +++ b/Include/Python.h @@ -35,6 +35,27 @@ #include // assert() #include // wchar_t +#ifdef CMLQ_PAPI +#define CMLQ_PAPI_BEGIN(NAME) \ + int papi_retval = PAPI_hl_region_begin(NAME); \ + assert(papi_retval == PAPI_OK); + +#define CMLQ_PAPI_END(NAME) \ + papi_retval = PAPI_hl_region_end(NAME); \ + assert(papi_retval == PAPI_OK); + +#define CMLQ_PAPI_REGION(NAME, CODE) \ + CMLQ_PAPI_BEGIN(NAME) \ + CODE; \ + CMLQ_PAPI_END(NAME) +#else +#define CMLQ_PAPI_REGION(NAME, CODE) \ +CODE; +#define CMLQ_PAPI_BEGIN(NAME) +#define CMLQ_PAPI_END(NAME) +#endif + + #include "pyport.h" #include "pymacro.h" #include "pymath.h" diff --git a/Include/cpython/code.h b/Include/cpython/code.h index 03834b20c3e83e..280f4add5a2b02 100644 --- a/Include/cpython/code.h +++ b/Include/cpython/code.h @@ -44,6 +44,7 @@ typedef union { } _Py_CODEUNIT; + /* These macros only remain defined for compatibility. */ #define _Py_OPCODE(word) ((word).op.code) #define _Py_OPARG(word) ((word).op.arg) @@ -104,6 +105,57 @@ typedef struct { uint8_t *per_instruction_tools; } _PyCoMonitoringData; +typedef struct _deoptInfo{ + // if a whole chain of instructions is deoptimized + struct _deoptInfo *child; + _Py_CODEUNIT orig_instr; + _Py_CODEUNIT *position; + short data; + struct _deoptInfo *next; + struct _deoptInfo *prev; +} PyExternalDeoptInfo; + +#ifndef __cplusplus +typedef int (*PyExternal_CodeHandler)(void *restrict external_cache_pointer, PyObject* restrict ** stack_pointer); +typedef int (*ExternalSpecializationHook)(_Py_CODEUNIT* old_instr, PyObject ***stack_pointer); +#else +typedef int (*PyExternal_CodeHandler)(_Py_CODEUNIT **next_instr, PyObject **stack_pointer); +typedef int (*ExternalSpecializationHook)(_Py_CODEUNIT *old_instr, PyObject ***stack_pointer); +#endif + +typedef void (*FunctionEndHook)(_Py_CODEUNIT *instr, void* external_cache_pointer); +typedef int (*SpecializeInstructionPtr)(_Py_CODEUNIT*, int, PyExternal_CodeHandler, void *); +typedef int (*SpecializeChainPtr)(_Py_CODEUNIT *, PyObject **, int , PyExternal_CodeHandler, unsigned char, void *); +typedef int (*IsOperandConstantPtr)(_Py_CODEUNIT *, PyObject **, int ); + +typedef struct _PyExternalSpecializer { + ExternalSpecializationHook TrySpecialization; + FunctionEndHook FunctionEnd; + + // TODO: workaround until we resolve the mysterious linking performance issue + // For some reason a few benchmarks suffer a major performance regression when the numpy module + // dynamically resolves the function with the linker. This hack avoids the resolution by the linker and seems to help + SpecializeInstructionPtr SpecializeInstruction; + SpecializeChainPtr SpecializeChain; + IsOperandConstantPtr IsOperandConstant; +} PyExternalSpecializer; + + +#if defined(Py_OPT_CMLQ_ENV) || defined(Py_OPT_CMLQ_ALWAYS) + #define CMLQ_Def \ + PyObject *co_size_table; \ + PyExternalDeoptInfo *co_deopt_info_head; +#else + #define CMLQ_Def +#endif + +#ifdef INSTR_STATS + #define CMLQ_Stats_Def \ + PyObject *co_stats_table; +#else + #define CMLQ_Stats_Def +#endif + // To avoid repeating ourselves in deepfreeze.py, all PyCodeObject members are // defined in this macro: #define _PyCode_DEF(SIZE) { \ @@ -159,6 +211,8 @@ typedef struct { PyObject *co_name; /* unicode (name, for reference) */ \ PyObject *co_qualname; /* unicode (qualname, for reference) */ \ PyObject *co_linetable; /* bytes object that holds location info */ \ + CMLQ_Def; \ + CMLQ_Stats_Def; \ PyObject *co_weakreflist; /* to support weakrefs to code objects */ \ _PyCoCached *_co_cached; /* cached co_* attributes */ \ uint64_t _co_instrumentation_version; /* current instrumentation version */ \ @@ -226,6 +280,24 @@ static inline int PyCode_GetFirstFree(PyCodeObject *op) { #define _PyCode_CODE(CO) _Py_RVALUE((_Py_CODEUNIT *)(CO)->co_code_adaptive) #define _PyCode_NBYTES(CO) (Py_SIZE(CO) * (Py_ssize_t)sizeof(_Py_CODEUNIT)) + +__attribute__((__used__)) +static int instruction_offset(PyCodeObject* co, _Py_CODEUNIT* instr) { + return instr - _PyCode_CODE(co); +} + + +#ifdef INSTR_STATS + typedef struct _CMLQStatsElem { + uint64_t exec_count; + uint64_t specialization_attempts; + uint64_t exec_ms; +} CMLQStatsElem; + +CMLQStatsElem *get_stats_elem(PyCodeObject* code, _Py_CODEUNIT* instr_ptr); +#endif + + /* Unstable public interface */ PyAPI_FUNC(PyCodeObject *) PyUnstable_Code_New( int, int, int, int, int, PyObject *, PyObject *, diff --git a/Include/cpython/pyframe.h b/Include/cpython/pyframe.h index 0e2afff925e31f..286fc08fd3aad6 100644 --- a/Include/cpython/pyframe.h +++ b/Include/cpython/pyframe.h @@ -1,3 +1,4 @@ + #ifndef Py_CPYTHON_PYFRAME_H # error "this header file must not be included directly" #endif @@ -33,3 +34,6 @@ PyAPI_FUNC(int) PyUnstable_InterpreterFrame_GetLasti(struct _PyInterpreterFrame /* Returns the currently executing line number, or -1 if there is no line number. * Does not raise an exception. */ PyAPI_FUNC(int) PyUnstable_InterpreterFrame_GetLine(struct _PyInterpreterFrame *frame); + +PyAPI_FUNC(void) PyExternal_SetSpecializer(PyExternalSpecializer *specializer); +//PyAPI_FUNC(int) PyExternal_SpecializeInstruction(_Py_CODEUNIT *instr, int slot, PyExternal_CodeHandler new_handler); \ No newline at end of file diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h index 75a23f3f5af560..cf500ce5ecb15b 100644 --- a/Include/internal/pycore_code.h +++ b/Include/internal/pycore_code.h @@ -28,6 +28,7 @@ typedef struct { typedef struct { uint16_t counter; + uint16_t external_cache_pointer[4]; } _PyBinaryOpCache; #define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache) @@ -47,6 +48,7 @@ typedef struct { typedef struct { uint16_t counter; + uint16_t external_cache_pointer[4]; } _PyBinarySubscrCache; #define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) @@ -79,6 +81,7 @@ typedef struct { typedef struct { uint16_t counter; uint16_t func_version[2]; + uint16_t external_cache_pointer[4]; } _PyCallCache; #define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache) @@ -163,6 +166,7 @@ struct _PyCodeConstructor { /* the code */ PyObject *code; + PyObject *size_table; int firstlineno; PyObject *linetable; diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index 158db2cf9df82e..bd4690f554dece 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -75,6 +75,22 @@ typedef struct _PyInterpreterFrame { #define _PyInterpreterFrame_LASTI(IF) \ ((int)((IF)->prev_instr - _PyCode_CODE((IF)->f_code))) + +typedef struct _PyCache { + uint16_t counter; +} _PyCache; + +#define POINTER_FROM_ARRAY(array) *((void **)array) +int +_PyExternal_TrySpecialize(_Py_CODEUNIT *instr, PyObject ***stack_pointer, _PyCache *cache); +void +_PyExternal_FunctionEnd(_PyInterpreterFrame *frame); + + + +_Py_CODEUNIT* +_PyExternal_Deoptimize(const _Py_CODEUNIT *instr, _PyInterpreterFrame* frame); + static inline PyObject **_PyFrame_Stackbase(_PyInterpreterFrame *f) { return f->localsplus + f->f_code->co_nlocalsplus; } @@ -96,6 +112,39 @@ static inline void _PyFrame_StackPush(_PyInterpreterFrame *f, PyObject *value) { f->stacktop++; } +__attribute__((__used__)) +static const char* function_name(_PyInterpreterFrame* f) { + if (!f) { + return "NULL"; + } + if (f->f_code && f->f_code->co_name) { + return _PyUnicode_AsString(f->f_code->co_name); + } + else { + return "No Code"; + } +} + +__attribute__((__used__)) +static const char* code_name(PyCodeObject* c) { + if (!c) { + return "NULL"; + } + if (c->co_name) { + return _PyUnicode_AsString(c->co_name); + } + else { + return "No Code"; + } +} + +__attribute__((__used__)) +static int in_function(_PyInterpreterFrame* f, char* name) { + int result = strcmp(function_name(f), name) == 0; + return result; +} + + #define FRAME_SPECIALS_SIZE ((int)((sizeof(_PyInterpreterFrame)-1)/sizeof(PyObject *))) static inline int diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 439f47a263dfa1..e9b7fe1f1a95d9 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -701,6 +701,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__release_buffer__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__repr__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__reversed__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rewrite__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rfloordiv__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rlshift__)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rmatmul__)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 0c84999cbf8127..26ff5af98040dc 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -190,6 +190,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(__release_buffer__) STRUCT_FOR_ID(__repr__) STRUCT_FOR_ID(__reversed__) + STRUCT_FOR_ID(__rewrite__) STRUCT_FOR_ID(__rfloordiv__) STRUCT_FOR_ID(__rlshift__) STRUCT_FOR_ID(__rmatmul__) diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h index 64c00cb1475480..91bc4b986ff036 100644 --- a/Include/internal/pycore_long.h +++ b/Include/internal/pycore_long.h @@ -81,6 +81,7 @@ static inline PyObject* _PyLong_FromUnsignedChar(unsigned char i) PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right); PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right); PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right); +PyObject *_PyLong_True_Divide(PyLongObject *left, PyLongObject *right); /* Used by Python/mystrtoul.c, _PyBytes_FromHex(), _PyBytes_DecodeEscape(), etc. */ diff --git a/Include/internal/pycore_opcode.h b/Include/internal/pycore_opcode.h index 15d96503830f93..746261d24542a2 100644 --- a/Include/internal/pycore_opcode.h +++ b/Include/internal/pycore_opcode.h @@ -32,7 +32,7 @@ const uint32_t _PyOpcode_Jump[9] = { }; const uint8_t _PyOpcode_Caches[256] = { - [BINARY_SUBSCR] = 1, + [BINARY_SUBSCR] = 5, [STORE_SUBSCR] = 1, [UNPACK_SEQUENCE] = 1, [FOR_ITER] = 1, @@ -40,10 +40,10 @@ const uint8_t _PyOpcode_Caches[256] = { [LOAD_ATTR] = 9, [COMPARE_OP] = 1, [LOAD_GLOBAL] = 4, - [BINARY_OP] = 1, + [BINARY_OP] = 5, [SEND] = 1, [LOAD_SUPER_ATTR] = 1, - [CALL] = 3, + [CALL] = 7, }; const uint8_t _PyOpcode_Deopt[256] = { @@ -53,6 +53,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [BINARY_OP_ADD_FLOAT] = BINARY_OP, [BINARY_OP_ADD_INT] = BINARY_OP, [BINARY_OP_ADD_UNICODE] = BINARY_OP, + [BINARY_OP_EXTERNAL] = BINARY_OP, [BINARY_OP_INPLACE_ADD_UNICODE] = BINARY_OP, [BINARY_OP_MULTIPLY_FLOAT] = BINARY_OP, [BINARY_OP_MULTIPLY_INT] = BINARY_OP, @@ -61,6 +62,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [BINARY_SLICE] = BINARY_SLICE, [BINARY_SUBSCR] = BINARY_SUBSCR, [BINARY_SUBSCR_DICT] = BINARY_SUBSCR, + [BINARY_SUBSCR_EXTERNAL] = BINARY_SUBSCR, [BINARY_SUBSCR_GETITEM] = BINARY_SUBSCR, [BINARY_SUBSCR_LIST_INT] = BINARY_SUBSCR, [BINARY_SUBSCR_TUPLE_INT] = BINARY_SUBSCR, @@ -76,6 +78,7 @@ const uint8_t _PyOpcode_Deopt[256] = { [CALL_BOUND_METHOD_EXACT_ARGS] = CALL, [CALL_BUILTIN_CLASS] = CALL, [CALL_BUILTIN_FAST_WITH_KEYWORDS] = CALL, + [CALL_EXTERNAL] = CALL, [CALL_FUNCTION_EX] = CALL_FUNCTION_EX, [CALL_INTRINSIC_1] = CALL_INTRINSIC_1, [CALL_INTRINSIC_2] = CALL_INTRINSIC_2, @@ -243,7 +246,7 @@ const uint8_t _PyOpcode_Deopt[256] = { }; #endif // NEED_OPCODE_TABLES -#ifdef Py_DEBUG +#if defined(Py_DEBUG) || defined(INSTR_STATS) static const char *const _PyOpcode_OpName[267] = { [CACHE] = "CACHE", [POP_TOP] = "POP_TOP", @@ -264,25 +267,27 @@ static const char *const _PyOpcode_OpName[267] = { [BINARY_OP_SUBTRACT_FLOAT] = "BINARY_OP_SUBTRACT_FLOAT", [RESERVED] = "RESERVED", [BINARY_OP_SUBTRACT_INT] = "BINARY_OP_SUBTRACT_INT", + [BINARY_OP_EXTERNAL] = "BINARY_OP_EXTERNAL", [BINARY_SUBSCR_DICT] = "BINARY_SUBSCR_DICT", [BINARY_SUBSCR_GETITEM] = "BINARY_SUBSCR_GETITEM", [BINARY_SUBSCR_LIST_INT] = "BINARY_SUBSCR_LIST_INT", [BINARY_SUBSCR_TUPLE_INT] = "BINARY_SUBSCR_TUPLE_INT", - [CALL_PY_EXACT_ARGS] = "CALL_PY_EXACT_ARGS", - [CALL_PY_WITH_DEFAULTS] = "CALL_PY_WITH_DEFAULTS", + [BINARY_SUBSCR_EXTERNAL] = "BINARY_SUBSCR_EXTERNAL", [BINARY_SUBSCR] = "BINARY_SUBSCR", [BINARY_SLICE] = "BINARY_SLICE", [STORE_SLICE] = "STORE_SLICE", - [CALL_BOUND_METHOD_EXACT_ARGS] = "CALL_BOUND_METHOD_EXACT_ARGS", - [CALL_BUILTIN_CLASS] = "CALL_BUILTIN_CLASS", + [CALL_PY_EXACT_ARGS] = "CALL_PY_EXACT_ARGS", + [CALL_PY_WITH_DEFAULTS] = "CALL_PY_WITH_DEFAULTS", [GET_LEN] = "GET_LEN", [MATCH_MAPPING] = "MATCH_MAPPING", [MATCH_SEQUENCE] = "MATCH_SEQUENCE", [MATCH_KEYS] = "MATCH_KEYS", - [CALL_BUILTIN_FAST_WITH_KEYWORDS] = "CALL_BUILTIN_FAST_WITH_KEYWORDS", + [CALL_BOUND_METHOD_EXACT_ARGS] = "CALL_BOUND_METHOD_EXACT_ARGS", [PUSH_EXC_INFO] = "PUSH_EXC_INFO", [CHECK_EXC_MATCH] = "CHECK_EXC_MATCH", [CHECK_EG_MATCH] = "CHECK_EG_MATCH", + [CALL_BUILTIN_CLASS] = "CALL_BUILTIN_CLASS", + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = "CALL_BUILTIN_FAST_WITH_KEYWORDS", [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", [CALL_NO_KW_BUILTIN_FAST] = "CALL_NO_KW_BUILTIN_FAST", [CALL_NO_KW_BUILTIN_O] = "CALL_NO_KW_BUILTIN_O", @@ -292,8 +297,6 @@ static const char *const _PyOpcode_OpName[267] = { [CALL_NO_KW_METHOD_DESCRIPTOR_FAST] = "CALL_NO_KW_METHOD_DESCRIPTOR_FAST", [CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS] = "CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS", [CALL_NO_KW_METHOD_DESCRIPTOR_O] = "CALL_NO_KW_METHOD_DESCRIPTOR_O", - [CALL_NO_KW_STR_1] = "CALL_NO_KW_STR_1", - [CALL_NO_KW_TUPLE_1] = "CALL_NO_KW_TUPLE_1", [WITH_EXCEPT_START] = "WITH_EXCEPT_START", [GET_AITER] = "GET_AITER", [GET_ANEXT] = "GET_ANEXT", @@ -301,39 +304,39 @@ static const char *const _PyOpcode_OpName[267] = { [BEFORE_WITH] = "BEFORE_WITH", [END_ASYNC_FOR] = "END_ASYNC_FOR", [CLEANUP_THROW] = "CLEANUP_THROW", + [CALL_NO_KW_STR_1] = "CALL_NO_KW_STR_1", + [CALL_NO_KW_TUPLE_1] = "CALL_NO_KW_TUPLE_1", [CALL_NO_KW_TYPE_1] = "CALL_NO_KW_TYPE_1", + [CALL_EXTERNAL] = "CALL_EXTERNAL", + [STORE_SUBSCR] = "STORE_SUBSCR", + [DELETE_SUBSCR] = "DELETE_SUBSCR", [COMPARE_OP_FLOAT] = "COMPARE_OP_FLOAT", [COMPARE_OP_INT] = "COMPARE_OP_INT", [COMPARE_OP_STR] = "COMPARE_OP_STR", - [STORE_SUBSCR] = "STORE_SUBSCR", - [DELETE_SUBSCR] = "DELETE_SUBSCR", [FOR_ITER_LIST] = "FOR_ITER_LIST", [FOR_ITER_TUPLE] = "FOR_ITER_TUPLE", [FOR_ITER_RANGE] = "FOR_ITER_RANGE", + [GET_ITER] = "GET_ITER", + [GET_YIELD_FROM_ITER] = "GET_YIELD_FROM_ITER", [FOR_ITER_GEN] = "FOR_ITER_GEN", + [LOAD_BUILD_CLASS] = "LOAD_BUILD_CLASS", [LOAD_SUPER_ATTR_ATTR] = "LOAD_SUPER_ATTR_ATTR", [LOAD_SUPER_ATTR_METHOD] = "LOAD_SUPER_ATTR_METHOD", - [GET_ITER] = "GET_ITER", - [GET_YIELD_FROM_ITER] = "GET_YIELD_FROM_ITER", + [LOAD_ASSERTION_ERROR] = "LOAD_ASSERTION_ERROR", + [RETURN_GENERATOR] = "RETURN_GENERATOR", [LOAD_ATTR_CLASS] = "LOAD_ATTR_CLASS", - [LOAD_BUILD_CLASS] = "LOAD_BUILD_CLASS", [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", [LOAD_ATTR_INSTANCE_VALUE] = "LOAD_ATTR_INSTANCE_VALUE", - [LOAD_ASSERTION_ERROR] = "LOAD_ASSERTION_ERROR", - [RETURN_GENERATOR] = "RETURN_GENERATOR", [LOAD_ATTR_MODULE] = "LOAD_ATTR_MODULE", [LOAD_ATTR_PROPERTY] = "LOAD_ATTR_PROPERTY", [LOAD_ATTR_SLOT] = "LOAD_ATTR_SLOT", [LOAD_ATTR_WITH_HINT] = "LOAD_ATTR_WITH_HINT", - [LOAD_ATTR_METHOD_LAZY_DICT] = "LOAD_ATTR_METHOD_LAZY_DICT", - [LOAD_ATTR_METHOD_NO_DICT] = "LOAD_ATTR_METHOD_NO_DICT", - [LOAD_ATTR_METHOD_WITH_VALUES] = "LOAD_ATTR_METHOD_WITH_VALUES", [RETURN_VALUE] = "RETURN_VALUE", - [LOAD_CONST__LOAD_FAST] = "LOAD_CONST__LOAD_FAST", + [LOAD_ATTR_METHOD_LAZY_DICT] = "LOAD_ATTR_METHOD_LAZY_DICT", [SETUP_ANNOTATIONS] = "SETUP_ANNOTATIONS", - [LOAD_FAST__LOAD_CONST] = "LOAD_FAST__LOAD_CONST", + [LOAD_ATTR_METHOD_NO_DICT] = "LOAD_ATTR_METHOD_NO_DICT", [LOAD_LOCALS] = "LOAD_LOCALS", - [LOAD_FAST__LOAD_FAST] = "LOAD_FAST__LOAD_FAST", + [LOAD_ATTR_METHOD_WITH_VALUES] = "LOAD_ATTR_METHOD_WITH_VALUES", [POP_EXCEPT] = "POP_EXCEPT", [STORE_NAME] = "STORE_NAME", [DELETE_NAME] = "DELETE_NAME", @@ -356,9 +359,9 @@ static const char *const _PyOpcode_OpName[267] = { [IMPORT_NAME] = "IMPORT_NAME", [IMPORT_FROM] = "IMPORT_FROM", [JUMP_FORWARD] = "JUMP_FORWARD", - [LOAD_GLOBAL_BUILTIN] = "LOAD_GLOBAL_BUILTIN", - [LOAD_GLOBAL_MODULE] = "LOAD_GLOBAL_MODULE", - [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", + [LOAD_CONST__LOAD_FAST] = "LOAD_CONST__LOAD_FAST", + [LOAD_FAST__LOAD_CONST] = "LOAD_FAST__LOAD_CONST", + [LOAD_FAST__LOAD_FAST] = "LOAD_FAST__LOAD_FAST", [POP_JUMP_IF_FALSE] = "POP_JUMP_IF_FALSE", [POP_JUMP_IF_TRUE] = "POP_JUMP_IF_TRUE", [LOAD_GLOBAL] = "LOAD_GLOBAL", @@ -393,36 +396,36 @@ static const char *const _PyOpcode_OpName[267] = { [LIST_APPEND] = "LIST_APPEND", [SET_ADD] = "SET_ADD", [MAP_ADD] = "MAP_ADD", - [STORE_ATTR_SLOT] = "STORE_ATTR_SLOT", + [LOAD_GLOBAL_BUILTIN] = "LOAD_GLOBAL_BUILTIN", [COPY_FREE_VARS] = "COPY_FREE_VARS", [YIELD_VALUE] = "YIELD_VALUE", [RESUME] = "RESUME", [MATCH_CLASS] = "MATCH_CLASS", - [STORE_ATTR_WITH_HINT] = "STORE_ATTR_WITH_HINT", - [STORE_FAST__LOAD_FAST] = "STORE_FAST__LOAD_FAST", + [LOAD_GLOBAL_MODULE] = "LOAD_GLOBAL_MODULE", + [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", [FORMAT_VALUE] = "FORMAT_VALUE", [BUILD_CONST_KEY_MAP] = "BUILD_CONST_KEY_MAP", [BUILD_STRING] = "BUILD_STRING", + [STORE_ATTR_SLOT] = "STORE_ATTR_SLOT", + [STORE_ATTR_WITH_HINT] = "STORE_ATTR_WITH_HINT", + [STORE_FAST__LOAD_FAST] = "STORE_FAST__LOAD_FAST", [STORE_FAST__STORE_FAST] = "STORE_FAST__STORE_FAST", - [STORE_SUBSCR_DICT] = "STORE_SUBSCR_DICT", - [STORE_SUBSCR_LIST_INT] = "STORE_SUBSCR_LIST_INT", - [UNPACK_SEQUENCE_LIST] = "UNPACK_SEQUENCE_LIST", [LIST_EXTEND] = "LIST_EXTEND", [SET_UPDATE] = "SET_UPDATE", [DICT_MERGE] = "DICT_MERGE", [DICT_UPDATE] = "DICT_UPDATE", + [STORE_SUBSCR_DICT] = "STORE_SUBSCR_DICT", + [STORE_SUBSCR_LIST_INT] = "STORE_SUBSCR_LIST_INT", + [UNPACK_SEQUENCE_LIST] = "UNPACK_SEQUENCE_LIST", [UNPACK_SEQUENCE_TUPLE] = "UNPACK_SEQUENCE_TUPLE", [UNPACK_SEQUENCE_TWO_TUPLE] = "UNPACK_SEQUENCE_TWO_TUPLE", - [SEND_GEN] = "SEND_GEN", - [169] = "<169>", - [170] = "<170>", [CALL] = "CALL", [KW_NAMES] = "KW_NAMES", [CALL_INTRINSIC_1] = "CALL_INTRINSIC_1", [CALL_INTRINSIC_2] = "CALL_INTRINSIC_2", [LOAD_FROM_DICT_OR_GLOBALS] = "LOAD_FROM_DICT_OR_GLOBALS", [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", - [177] = "<177>", + [SEND_GEN] = "SEND_GEN", [178] = "<178>", [179] = "<179>", [180] = "<180>", @@ -516,9 +519,6 @@ static const char *const _PyOpcode_OpName[267] = { #endif #define EXTRA_CASES \ - case 169: \ - case 170: \ - case 177: \ case 178: \ case 179: \ case 180: \ diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 07f237b2905864..89d1841506c3aa 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -696,6 +696,7 @@ extern "C" { INIT_ID(__release_buffer__), \ INIT_ID(__repr__), \ INIT_ID(__reversed__), \ + INIT_ID(__rewrite__), \ INIT_ID(__rfloordiv__), \ INIT_ID(__rlshift__), \ INIT_ID(__rmatmul__), \ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 9b470094b7afe2..6af834ce51cac1 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -411,6 +411,9 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { string = &_Py_ID(__reversed__); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); + string = &_Py_ID(__rewrite__); + assert(_PyUnicode_CheckConsistency(string, 1)); + _PyUnicode_InternInPlace(interp, &string); string = &_Py_ID(__rfloordiv__); assert(_PyUnicode_CheckConsistency(string, 1)); _PyUnicode_InternInPlace(interp, &string); diff --git a/Include/opcode.h b/Include/opcode.h index 9806511ba4286a..3e7c725b75b2f3 100644 --- a/Include/opcode.h +++ b/Include/opcode.h @@ -160,62 +160,65 @@ extern "C" { #define BINARY_OP_MULTIPLY_INT 14 #define BINARY_OP_SUBTRACT_FLOAT 16 #define BINARY_OP_SUBTRACT_INT 18 -#define BINARY_SUBSCR_DICT 19 -#define BINARY_SUBSCR_GETITEM 20 -#define BINARY_SUBSCR_LIST_INT 21 -#define BINARY_SUBSCR_TUPLE_INT 22 -#define CALL_PY_EXACT_ARGS 23 -#define CALL_PY_WITH_DEFAULTS 24 -#define CALL_BOUND_METHOD_EXACT_ARGS 28 -#define CALL_BUILTIN_CLASS 29 -#define CALL_BUILTIN_FAST_WITH_KEYWORDS 34 -#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 38 -#define CALL_NO_KW_BUILTIN_FAST 39 -#define CALL_NO_KW_BUILTIN_O 40 -#define CALL_NO_KW_ISINSTANCE 41 -#define CALL_NO_KW_LEN 42 -#define CALL_NO_KW_LIST_APPEND 43 -#define CALL_NO_KW_METHOD_DESCRIPTOR_FAST 44 -#define CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 45 -#define CALL_NO_KW_METHOD_DESCRIPTOR_O 46 -#define CALL_NO_KW_STR_1 47 -#define CALL_NO_KW_TUPLE_1 48 -#define CALL_NO_KW_TYPE_1 56 -#define COMPARE_OP_FLOAT 57 -#define COMPARE_OP_INT 58 -#define COMPARE_OP_STR 59 -#define FOR_ITER_LIST 62 -#define FOR_ITER_TUPLE 63 -#define FOR_ITER_RANGE 64 -#define FOR_ITER_GEN 65 -#define LOAD_SUPER_ATTR_ATTR 66 -#define LOAD_SUPER_ATTR_METHOD 67 -#define LOAD_ATTR_CLASS 70 -#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 72 -#define LOAD_ATTR_INSTANCE_VALUE 73 -#define LOAD_ATTR_MODULE 76 -#define LOAD_ATTR_PROPERTY 77 -#define LOAD_ATTR_SLOT 78 -#define LOAD_ATTR_WITH_HINT 79 -#define LOAD_ATTR_METHOD_LAZY_DICT 80 -#define LOAD_ATTR_METHOD_NO_DICT 81 -#define LOAD_ATTR_METHOD_WITH_VALUES 82 -#define LOAD_CONST__LOAD_FAST 84 -#define LOAD_FAST__LOAD_CONST 86 -#define LOAD_FAST__LOAD_FAST 88 -#define LOAD_GLOBAL_BUILTIN 111 -#define LOAD_GLOBAL_MODULE 112 -#define STORE_ATTR_INSTANCE_VALUE 113 -#define STORE_ATTR_SLOT 148 -#define STORE_ATTR_WITH_HINT 153 -#define STORE_FAST__LOAD_FAST 154 -#define STORE_FAST__STORE_FAST 158 -#define STORE_SUBSCR_DICT 159 -#define STORE_SUBSCR_LIST_INT 160 -#define UNPACK_SEQUENCE_LIST 161 -#define UNPACK_SEQUENCE_TUPLE 166 -#define UNPACK_SEQUENCE_TWO_TUPLE 167 -#define SEND_GEN 168 +#define BINARY_OP_EXTERNAL 19 +#define BINARY_SUBSCR_DICT 20 +#define BINARY_SUBSCR_GETITEM 21 +#define BINARY_SUBSCR_LIST_INT 22 +#define BINARY_SUBSCR_TUPLE_INT 23 +#define BINARY_SUBSCR_EXTERNAL 24 +#define CALL_PY_EXACT_ARGS 28 +#define CALL_PY_WITH_DEFAULTS 29 +#define CALL_BOUND_METHOD_EXACT_ARGS 34 +#define CALL_BUILTIN_CLASS 38 +#define CALL_BUILTIN_FAST_WITH_KEYWORDS 39 +#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 40 +#define CALL_NO_KW_BUILTIN_FAST 41 +#define CALL_NO_KW_BUILTIN_O 42 +#define CALL_NO_KW_ISINSTANCE 43 +#define CALL_NO_KW_LEN 44 +#define CALL_NO_KW_LIST_APPEND 45 +#define CALL_NO_KW_METHOD_DESCRIPTOR_FAST 46 +#define CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS 47 +#define CALL_NO_KW_METHOD_DESCRIPTOR_O 48 +#define CALL_NO_KW_STR_1 56 +#define CALL_NO_KW_TUPLE_1 57 +#define CALL_NO_KW_TYPE_1 58 +#define CALL_EXTERNAL 59 +#define COMPARE_OP_FLOAT 62 +#define COMPARE_OP_INT 63 +#define COMPARE_OP_STR 64 +#define FOR_ITER_LIST 65 +#define FOR_ITER_TUPLE 66 +#define FOR_ITER_RANGE 67 +#define FOR_ITER_GEN 70 +#define LOAD_SUPER_ATTR_ATTR 72 +#define LOAD_SUPER_ATTR_METHOD 73 +#define LOAD_ATTR_CLASS 76 +#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 77 +#define LOAD_ATTR_INSTANCE_VALUE 78 +#define LOAD_ATTR_MODULE 79 +#define LOAD_ATTR_PROPERTY 80 +#define LOAD_ATTR_SLOT 81 +#define LOAD_ATTR_WITH_HINT 82 +#define LOAD_ATTR_METHOD_LAZY_DICT 84 +#define LOAD_ATTR_METHOD_NO_DICT 86 +#define LOAD_ATTR_METHOD_WITH_VALUES 88 +#define LOAD_CONST__LOAD_FAST 111 +#define LOAD_FAST__LOAD_CONST 112 +#define LOAD_FAST__LOAD_FAST 113 +#define LOAD_GLOBAL_BUILTIN 148 +#define LOAD_GLOBAL_MODULE 153 +#define STORE_ATTR_INSTANCE_VALUE 154 +#define STORE_ATTR_SLOT 158 +#define STORE_ATTR_WITH_HINT 159 +#define STORE_FAST__LOAD_FAST 160 +#define STORE_FAST__STORE_FAST 161 +#define STORE_SUBSCR_DICT 166 +#define STORE_SUBSCR_LIST_INT 167 +#define UNPACK_SEQUENCE_LIST 168 +#define UNPACK_SEQUENCE_TUPLE 169 +#define UNPACK_SEQUENCE_TWO_TUPLE 170 +#define SEND_GEN 177 #define HAS_ARG(op) ((((op) >= HAVE_ARGUMENT) && (!IS_PSEUDO_OPCODE(op)))\ || ((op) == JUMP) \ diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 73ac4405cb54cf..0904bbd88a09d9 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -461,7 +461,7 @@ def _write_atomic(path, data, mode=0o666): # Whenever MAGIC_NUMBER is changed, the ranges in the magic_values array # in PC/launcher.c must also be updated. -MAGIC_NUMBER = (3531).to_bytes(2, 'little') + b'\r\n' +MAGIC_NUMBER = (3533).to_bytes(2, 'little') + b'\r\n' _RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c diff --git a/Lib/opcode.py b/Lib/opcode.py index 6bb2f1c140b15a..b40720e97a5697 100644 --- a/Lib/opcode.py +++ b/Lib/opcode.py @@ -348,12 +348,15 @@ def pseudo_op(name, op, real_ops): "BINARY_OP_MULTIPLY_INT", "BINARY_OP_SUBTRACT_FLOAT", "BINARY_OP_SUBTRACT_INT", + "BINARY_OP_EXTERNAL" ], "BINARY_SUBSCR": [ "BINARY_SUBSCR_DICT", "BINARY_SUBSCR_GETITEM", "BINARY_SUBSCR_LIST_INT", "BINARY_SUBSCR_TUPLE_INT", + "BINARY_SUBSCR_EXTERNAL" + ], "CALL": [ "CALL_PY_EXACT_ARGS", @@ -373,6 +376,7 @@ def pseudo_op(name, op, real_ops): "CALL_NO_KW_STR_1", "CALL_NO_KW_TUPLE_1", "CALL_NO_KW_TYPE_1", + "CALL_EXTERNAL", ], "COMPARE_OP": [ "COMPARE_OP_FLOAT", @@ -449,7 +453,9 @@ def pseudo_op(name, op, real_ops): }, "BINARY_OP": { "counter": 1, + "external_cache_pointer": 4, }, + "UNPACK_SEQUENCE": { "counter": 1, }, @@ -458,10 +464,14 @@ def pseudo_op(name, op, real_ops): }, "BINARY_SUBSCR": { "counter": 1, + "external_cache_pointer": 4, }, "FOR_ITER": { "counter": 1, }, + "LXML_FOR_ITER": { + "counter": 1, + }, "LOAD_SUPER_ATTR": { "counter": 1, }, @@ -479,6 +489,7 @@ def pseudo_op(name, op, real_ops): "CALL": { "counter": 1, "func_version": 2, + "external_cache_pointer": 4, }, "STORE_SUBSCR": { "counter": 1, @@ -486,6 +497,7 @@ def pseudo_op(name, op, real_ops): "SEND": { "counter": 1, }, + } _inline_cache_entries = [ diff --git a/Makefile.pre.in b/Makefile.pre.in index 09ceccda1dcde5..01bcfab46d4bae 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -56,6 +56,7 @@ DTRACE_HEADERS= @DTRACE_HEADERS@ DTRACE_OBJS= @DTRACE_OBJS@ DSYMUTIL= @DSYMUTIL@ DSYMUTIL_PATH= @DSYMUTIL_PATH@ +PAPI_LIBS=@PAPI_LIBS@ GNULD= @GNULD@ @@ -268,7 +269,7 @@ STATIC_LIBPYTHON= @STATIC_LIBPYTHON@ LIBS= @LIBS@ LIBM= @LIBM@ LIBC= @LIBC@ -SYSLIBS= $(LIBM) $(LIBC) +SYSLIBS= $(LIBM) $(LIBC) $(PAPI_LIBS) SHLIBS= @SHLIBS@ DLINCLDIR= @DLINCLDIR@ @@ -1595,6 +1596,9 @@ regen-typeslots: ############################################################################ # Header files +NUMPY_HEADERS := $(wildcard /home/felixl/repositories/numpy/numpy/_core/include/*.h) + + PYTHON_HEADERS= \ $(srcdir)/Include/Python.h \ $(srcdir)/Include/abstract.h \ @@ -1801,7 +1805,8 @@ PYTHON_HEADERS= \ $(DTRACE_HEADERS) \ @PLATFORM_HEADERS@ \ \ - $(srcdir)/Python/stdlib_module_names.h + $(srcdir)/Python/stdlib_module_names.h \ + $(NUMPY_HEADERS) $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS) diff --git a/Modules/main.c b/Modules/main.c index 7edfeb3365b4c6..ea465d97dddf63 100644 --- a/Modules/main.c +++ b/Modules/main.c @@ -546,6 +546,10 @@ pymain_repl(PyConfig *config, int *exitcode) } +#ifdef INSTR_STATS + extern PyObject* optimized_functions; +#endif + static void pymain_run_python(int *exitcode) { @@ -597,6 +601,10 @@ pymain_run_python(int *exitcode) pymain_header(config); +#ifdef INSTR_STATS + optimized_functions = PyList_New(0); +#endif + if (config->run_command) { *exitcode = pymain_run_command(config->run_command); } diff --git a/Objects/codeobject.c b/Objects/codeobject.c index aee1213632e490..c80197d0aa658f 100644 --- a/Objects/codeobject.c +++ b/Objects/codeobject.c @@ -391,6 +391,16 @@ _PyCode_Validate(struct _PyCodeConstructor *con) extern void _PyCode_Quicken(PyCodeObject *code); +#ifdef INSTR_STATS +static PyObject* +new_empty_stats_table(PyCodeObject* co) { + PyObject *bytes_object = PyBytes_FromStringAndSize(NULL, Py_SIZE(co) * sizeof(CMLQStatsElem)); + CMLQStatsElem *table = (CMLQStatsElem *)PyBytes_AS_STRING(bytes_object); + memset(table, 0, Py_SIZE(co) * sizeof(CMLQStatsElem)); + return bytes_object; +} +#endif + static void init_code(PyCodeObject *co, struct _PyCodeConstructor *con) { @@ -446,6 +456,16 @@ init_code(PyCodeObject *co, struct _PyCodeConstructor *con) entry_point++; } co->_co_firsttraceable = entry_point; + +#if defined(Py_OPT_CMLQ_ENV) || defined(Py_OPT_CMLQ_ALWAYS) + co->co_size_table = PyBytes_FromStringAndSize(NULL, Py_SIZE(co)); + co->co_deopt_info_head = NULL; +#endif + +#ifdef INSTR_STATS + co->co_stats_table = new_empty_stats_table(co); +#endif + _PyCode_Quicken(co); notify_code_watchers(PY_CODE_EVENT_CREATE, co); } @@ -2320,6 +2340,15 @@ _PyStaticCode_Init(PyCodeObject *co) if (res < 0) { return -1; } +#if defined(Py_OPT_CMLQ_ENV) || defined(Py_OPT_CMLQ_ALWAYS) + co->co_size_table = PyBytes_FromStringAndSize(NULL, Py_SIZE(co)); + co->co_deopt_info_head = NULL; +#endif + +#ifdef INSTR_STATS + co->co_stats_table = new_empty_stats_table(co); +#endif + _PyCode_Quicken(co); return 0; } @@ -2398,3 +2427,15 @@ _Py_MakeShimCode(const _PyShimCodeDef *codedef) Py_XDECREF(lines); return codeobj; } + +#ifdef INSTR_STATS +CMLQStatsElem *get_stats_elem(PyCodeObject* code, _Py_CODEUNIT* instr_ptr){ + if(code->co_stats_table == NULL) { + return NULL; + } + int offset = instruction_offset(code, instr_ptr); + CMLQStatsElem *stats_table = (CMLQStatsElem *)PyBytes_AS_STRING(code->co_stats_table); + CMLQStatsElem *elem = &stats_table[offset]; + return elem; +} +#endif \ No newline at end of file diff --git a/Objects/typeobject.c b/Objects/typeobject.c index 5c71c28f751504..24fc4dd4b48cc7 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -21,6 +21,10 @@ #include #include // ptrdiff_t +#include +#include +#include + /*[clinic input] class type "PyTypeObject *" "&PyType_Type" class object "PyObject *" "&PyBaseObject_Type" @@ -1716,6 +1720,24 @@ _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems) return obj; } +static void +print_trace (void) +{ + void *array[10]; + char **strings; + int size, i; + + size = backtrace (array, 10); + strings = backtrace_symbols (array, size); + if (strings != NULL) + { + for (i = 0; i < size; i++) + fprintf (stderr, "%s\n", strings[i]); + } + + free (strings); +} + PyObject * PyType_GenericAlloc(PyTypeObject *type, Py_ssize_t nitems) { diff --git a/Programs/test_frozenmain.h b/Programs/test_frozenmain.h index cd9d1032629f49..6aee36415b0657 100644 --- a/Programs/test_frozenmain.h +++ b/Programs/test_frozenmain.h @@ -1,17 +1,20 @@ // Auto-generated by Programs/freeze_test_frozenmain.py unsigned char M_test_frozenmain[] = { 227,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0, - 0,0,0,0,0,243,162,0,0,0,151,0,100,0,100,1, + 0,0,0,0,0,243,210,0,0,0,151,0,100,0,100,1, 108,0,90,0,100,0,100,1,108,1,90,1,2,0,101,2, - 100,2,171,1,0,0,0,0,0,0,1,0,2,0,101,2, - 100,3,101,0,106,6,0,0,0,0,0,0,0,0,0,0, - 0,0,0,0,0,0,0,0,171,2,0,0,0,0,0,0, + 100,2,171,1,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,1,0,2,0,101,2,100,3,101,0,106,6,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 171,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 1,0,2,0,101,1,106,8,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,171,0,0,0,0,0, - 0,0,100,4,25,0,0,0,90,5,100,5,68,0,93,19, + 0,0,0,0,0,0,0,0,0,0,100,4,25,0,0,0, + 0,0,0,0,0,0,0,0,90,5,100,5,68,0,93,27, 0,0,90,6,2,0,101,2,100,6,101,6,155,0,100,7, - 101,5,101,6,25,0,0,0,155,0,157,4,171,1,0,0, - 0,0,0,0,1,0,140,21,4,0,121,1,41,8,233,0, + 101,5,101,6,25,0,0,0,0,0,0,0,0,0,0,0, + 155,0,157,4,171,1,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,1,0,140,29,4,0,121,1,41,8,233,0, 0,0,0,78,122,18,70,114,111,122,101,110,32,72,101,108, 108,111,32,87,111,114,108,100,122,8,115,121,115,46,97,114, 103,118,218,6,99,111,110,102,105,103,41,5,218,12,112,114, @@ -27,12 +30,12 @@ unsigned char M_test_frozenmain[] = { 218,3,107,101,121,169,0,243,0,0,0,0,250,18,116,101, 115,116,95,102,114,111,122,101,110,109,97,105,110,46,112,121, 250,8,60,109,111,100,117,108,101,62,114,18,0,0,0,1, - 0,0,0,115,102,0,0,0,240,3,1,1,1,243,8,0, - 1,11,219,0,24,225,0,5,208,6,26,212,0,27,217,0, - 5,128,106,144,35,151,40,145,40,212,0,27,216,9,38,208, - 9,26,215,9,38,209,9,38,211,9,40,168,24,209,9,50, - 128,6,240,2,6,12,2,242,0,7,1,42,128,67,241,14, - 0,5,10,136,71,144,67,144,53,152,2,152,54,160,35,153, - 59,152,45,208,10,40,213,4,41,241,15,7,1,42,114,16, - 0,0,0, + 0,0,0,115,111,0,0,0,240,3,1,1,1,243,8,0, + 1,11,219,0,24,225,0,5,208,6,26,215,0,27,208,0, + 27,217,0,5,128,106,144,35,151,40,145,40,215,0,27,208, + 0,27,216,9,38,208,9,26,215,9,38,209,9,38,215,9, + 40,168,24,213,9,50,128,6,240,2,6,12,2,242,0,7, + 1,42,128,67,241,14,0,5,10,136,71,144,67,144,53,152, + 2,152,54,160,35,157,59,152,45,208,10,40,215,4,41,209, + 4,41,241,15,7,1,42,114,16,0,0,0, }; diff --git a/Python/bltinmodule.c b/Python/bltinmodule.c index 7f366b43599ae5..01819200e6f921 100644 --- a/Python/bltinmodule.c +++ b/Python/bltinmodule.c @@ -2,6 +2,8 @@ #include "Python.h" #include +#include + #include "pycore_ast.h" // _PyAST_Validate() #include "pycore_call.h" // _PyObject_CallNoArgs() #include "pycore_compile.h" // _PyAST_Compile() @@ -277,6 +279,99 @@ builtin___import___impl(PyObject *module, PyObject *name, PyObject *globals, } +#ifdef INSTR_STATS +PyObject* optimized_functions; + +void +add_optimized_function(PyCodeObject* co) { + Py_ssize_t i, len; + len = PyList_Size(optimized_functions); + if (len == -1) { + PyErr_Print(); + return; + } + for (i = 0; i < len; i++) { + PyObject *item = PyList_GetItem(optimized_functions, i); + if (item == NULL) { + PyErr_Print(); + return; + } + + if (item == (PyObject *)co) { + return; + } + } + + PyList_Append(optimized_functions, (PyObject *)co); +} + +PyDoc_STRVAR(cmlq_doc, +"Documentation for cmlq stats"); + +static PyObject * +builtin_get_cmlq_functions(PyObject *module) +{ + return optimized_functions; +} + +static PyObject * +builtin_get_cmlq_stats(PyObject *module, PyCodeObject *co) +{ + PyObject* result_dict = PyDict_New(); + + if (result_dict == NULL) { + // Failed to create list + return NULL; + } + + int num_instructions = Py_SIZE(co); + for (int i = 0; i < num_instructions; i++) { + CMLQStatsElem *elem = &((CMLQStatsElem *)PyBytes_AS_STRING(co->co_stats_table))[i]; + + // Create a dictionary for the current struct + PyObject* elem_dict = PyDict_New(); + + if (elem_dict == NULL) { + // Failed to create dictionary + Py_DECREF(result_dict); + return NULL; + } + + int base_opcode = _Py_GetBaseOpcode(co, i); + assert(base_opcode < MIN_INSTRUMENTED_OPCODE); + int caches = _PyOpcode_Caches[base_opcode]; + uint8_t opcode = _PyCode_CODE(co)[i].op.code; + + PyDict_SetItemString(elem_dict, "instr_ptr", PyLong_FromVoidPtr(_PyCode_CODE(co) + i)); + PyDict_SetItemString(elem_dict, "offset", PyLong_FromLong(i * sizeof(_Py_CODEUNIT))); + PyDict_SetItemString(elem_dict, "base_opcode", PyLong_FromLong(base_opcode)); + PyDict_SetItemString(elem_dict, "opcode", PyLong_FromLong(opcode)); + + // We need to include these here because dis does not know about specialized opcode names + PyDict_SetItemString(elem_dict, "base_opcode_name", PyUnicode_FromString(_PyOpcode_OpName[base_opcode])); + PyDict_SetItemString(elem_dict, "opcode_name", PyUnicode_FromString(_PyOpcode_OpName[opcode])); + PyDict_SetItemString(elem_dict, "exec_count", PyLong_FromUnsignedLongLong(elem->exec_count)); + PyDict_SetItemString(elem_dict, "exec_ms", PyLong_FromUnsignedLongLong(elem->exec_ms)); + PyDict_SetItemString(elem_dict, "specialization_attempts", PyLong_FromUnsignedLongLong(elem->specialization_attempts)); + + // Append the dictionary to the result list + PyDict_SetItem(result_dict, PyLong_FromLong(i), elem_dict); + + // Decrement the reference count of the dictionary + Py_DECREF(elem_dict); + + if (caches) { + i += caches; + } + + } + + return result_dict; + +} + +#endif + /*[clinic input] abs as builtin_abs @@ -455,6 +550,7 @@ builtin_callable(PyObject *module, PyObject *obj) static PyObject * builtin_breakpoint(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *keywords) { + return Py_None; PyObject *hook = PySys_GetObject("breakpointhook"); if (hook == NULL) { @@ -3053,6 +3149,11 @@ static PyMethodDef builtin_methods[] = { BUILTIN_SORTED_METHODDEF BUILTIN_SUM_METHODDEF BUILTIN_VARS_METHODDEF +#ifdef INSTR_STATS + {"get_cmlq_functions", _PyCFunction_CAST(builtin_get_cmlq_functions), METH_NOARGS, cmlq_doc}, + {"get_cmlq_stats", _PyCFunction_CAST(builtin_get_cmlq_stats), METH_O, cmlq_doc}, +#endif + {NULL, NULL}, }; diff --git a/Python/bytecodes.c b/Python/bytecodes.c index dfebaaa300fc8a..b182d1d3506760 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -175,6 +175,7 @@ dummy_func( } } + inst(LOAD_CLOSURE, (-- value)) { /* We keep LOAD_CLOSURE so that the bytecode stays more readable. */ value = GETLOCAL(oparg); @@ -290,7 +291,7 @@ dummy_func( }; - inst(BINARY_OP_MULTIPLY_INT, (unused/1, left, right -- prod)) { + inst(BINARY_OP_MULTIPLY_INT, (unused/1, unused/4, left, right -- prod)) { DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -300,7 +301,7 @@ dummy_func( ERROR_IF(prod == NULL, error); } - inst(BINARY_OP_MULTIPLY_FLOAT, (unused/1, left, right -- prod)) { + inst(BINARY_OP_MULTIPLY_FLOAT, (unused/1, unused/4, left, right -- prod)) { DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -309,7 +310,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dprod, prod); } - inst(BINARY_OP_SUBTRACT_INT, (unused/1, left, right -- sub)) { + inst(BINARY_OP_SUBTRACT_INT, (unused/1, unused/4, left, right -- sub)) { DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -319,7 +320,7 @@ dummy_func( ERROR_IF(sub == NULL, error); } - inst(BINARY_OP_SUBTRACT_FLOAT, (unused/1, left, right -- sub)) { + inst(BINARY_OP_SUBTRACT_FLOAT, (unused/1, unused/4, left, right -- sub)) { DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -327,7 +328,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dsub, sub); } - inst(BINARY_OP_ADD_UNICODE, (unused/1, left, right -- res)) { + inst(BINARY_OP_ADD_UNICODE, (unused/1, unused/4, left, right -- res)) { DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -372,7 +373,7 @@ dummy_func( JUMPBY(INLINE_CACHE_ENTRIES_BINARY_OP + 1); } - inst(BINARY_OP_ADD_FLOAT, (unused/1, left, right -- sum)) { + inst(BINARY_OP_ADD_FLOAT, (unused/1, unused/4, left, right -- sum)) { DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -381,7 +382,7 @@ dummy_func( DECREF_INPUTS_AND_REUSE_FLOAT(left, right, dsum, sum); } - inst(BINARY_OP_ADD_INT, (unused/1, left, right -- sum)) { + inst(BINARY_OP_ADD_INT, (unused/1, unused/4, left, right -- sum)) { DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -399,12 +400,16 @@ dummy_func( BINARY_SUBSCR_TUPLE_INT, }; - inst(BINARY_SUBSCR, (unused/1, container, sub -- res)) { + inst(BINARY_SUBSCR, (unused/1, unused/4, container, sub -- res)) { #if ENABLE_SPECIALIZATION _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_BinarySubscr(container, sub, next_instr); + int result = _PyExternal_TrySpecialize(next_instr, &stack_pointer, (_PyCache *)cache); + if (result) { + oparg = next_instr->op.arg; + } DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_SUBSCR, deferred); @@ -445,7 +450,7 @@ dummy_func( ERROR_IF(err, error); } - inst(BINARY_SUBSCR_LIST_INT, (unused/1, list, sub -- res)) { + inst(BINARY_SUBSCR_LIST_INT, (unused/1, unused/4, list, sub -- res)) { DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); @@ -461,7 +466,7 @@ dummy_func( Py_DECREF(list); } - inst(BINARY_SUBSCR_TUPLE_INT, (unused/1, tuple, sub -- res)) { + inst(BINARY_SUBSCR_TUPLE_INT, (unused/1, unused/4, tuple, sub -- res)) { DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); @@ -477,7 +482,7 @@ dummy_func( Py_DECREF(tuple); } - inst(BINARY_SUBSCR_DICT, (unused/1, dict, sub -- res)) { + inst(BINARY_SUBSCR_DICT, (unused/1, unused/4, dict, sub -- res)) { DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); res = PyDict_GetItemWithError(dict, sub); @@ -492,7 +497,7 @@ dummy_func( DECREF_INPUTS(); } - inst(BINARY_SUBSCR_GETITEM, (unused/1, container, sub -- unused)) { + inst(BINARY_SUBSCR_GETITEM, (unused/1, unused/4, container, sub -- unused)) { DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); @@ -517,6 +522,7 @@ dummy_func( DISPATCH_INLINED(new_frame); } + inst(LIST_APPEND, (list, unused[oparg-1], v -- list, unused[oparg-1])) { ERROR_IF(_PyList_AppendTakeRef((PyListObject *)list, v) < 0, error); PREDICT(JUMP_BACKWARD); @@ -643,6 +649,7 @@ dummy_func( STACK_SHRINK(1); assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); + _PyExternal_FunctionEnd(frame); _Py_LeaveRecursiveCallPy(tstate); assert(frame != &entry_frame); // GH-99729: We need to unlink the frame *before* clearing it: @@ -679,6 +686,7 @@ dummy_func( assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); _Py_LeaveRecursiveCallPy(tstate); + _PyExternal_FunctionEnd(frame); assert(frame != &entry_frame); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; @@ -2654,7 +2662,7 @@ dummy_func( // (Some args may be keywords, see KW_NAMES, which sets 'kwnames'.) // On exit, the stack is [result]. // When calling Python, inline the call using DISPATCH_INLINED(). - inst(CALL, (unused/1, unused/2, method, callable, args[oparg] -- res)) { + inst(CALL, (unused/1, unused/2, unused/4, method, callable, args[oparg] -- res)) { int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -2667,6 +2675,10 @@ dummy_func( if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_Call(callable, next_instr, total_args, kwnames); + int result = _PyExternal_TrySpecialize(next_instr, &stack_pointer, (_PyCache *)cache); + if (result) { + oparg = next_instr->op.arg; + } DISPATCH_SAME_OPARG(); } STAT_INC(CALL, deferred); @@ -2742,7 +2754,7 @@ dummy_func( // Start out with [NULL, bound_method, arg1, arg2, ...] // Transform to [callable, self, arg1, arg2, ...] // Then fall through to CALL_PY_EXACT_ARGS - inst(CALL_BOUND_METHOD_EXACT_ARGS, (unused/1, unused/2, method, callable, unused[oparg] -- unused)) { + inst(CALL_BOUND_METHOD_EXACT_ARGS, (unused/1, unused/2, unused/4, method, callable, unused[oparg] -- unused)) { DEOPT_IF(method != NULL, CALL); DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL); STAT_INC(CALL, hit); @@ -2754,7 +2766,7 @@ dummy_func( GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); } - inst(CALL_PY_EXACT_ARGS, (unused/1, func_version/2, method, callable, args[oparg] -- unused)) { + inst(CALL_PY_EXACT_ARGS, (unused/1, func_version/2, unused/4, method, callable, args[oparg] -- unused)) { assert(kwnames == NULL); DEOPT_IF(tstate->interp->eval_frame, CALL); int is_meth = method != NULL; @@ -2782,7 +2794,7 @@ dummy_func( DISPATCH_INLINED(new_frame); } - inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, method, callable, args[oparg] -- unused)) { + inst(CALL_PY_WITH_DEFAULTS, (unused/1, func_version/2, unused/4, method, callable, args[oparg] -- unused)) { assert(kwnames == NULL); DEOPT_IF(tstate->interp->eval_frame, CALL); int is_meth = method != NULL; @@ -2820,7 +2832,7 @@ dummy_func( DISPATCH_INLINED(new_frame); } - inst(CALL_NO_KW_TYPE_1, (unused/1, unused/2, null, callable, args[oparg] -- res)) { + inst(CALL_NO_KW_TYPE_1, (unused/1, unused/2, unused/4, null, callable, args[oparg] -- res)) { assert(kwnames == NULL); assert(oparg == 1); DEOPT_IF(null != NULL, CALL); @@ -2832,7 +2844,7 @@ dummy_func( Py_DECREF(&PyType_Type); // I.e., callable } - inst(CALL_NO_KW_STR_1, (unused/1, unused/2, null, callable, args[oparg] -- res)) { + inst(CALL_NO_KW_STR_1, (unused/1, unused/2, unused/4, null, callable, args[oparg] -- res)) { assert(kwnames == NULL); assert(oparg == 1); DEOPT_IF(null != NULL, CALL); @@ -2846,7 +2858,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_NO_KW_TUPLE_1, (unused/1, unused/2, null, callable, args[oparg] -- res)) { + inst(CALL_NO_KW_TUPLE_1, (unused/1, unused/2, unused/4, null, callable, args[oparg] -- res)) { assert(kwnames == NULL); assert(oparg == 1); DEOPT_IF(null != NULL, CALL); @@ -2860,7 +2872,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_BUILTIN_CLASS, (unused/1, unused/2, method, callable, args[oparg] -- res)) { + inst(CALL_BUILTIN_CLASS, (unused/1, unused/2, unused/4, method, callable, args[oparg] -- res)) { int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -2885,7 +2897,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_NO_KW_BUILTIN_O, (unused/1, unused/2, method, callable, args[oparg] -- res)) { + inst(CALL_NO_KW_BUILTIN_O, (unused/1, unused/2, unused/4, method, callable, args[oparg] -- res)) { /* Builtin METH_O functions */ assert(kwnames == NULL); int is_meth = method != NULL; @@ -2916,7 +2928,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_NO_KW_BUILTIN_FAST, (unused/1, unused/2, method, callable, args[oparg] -- res)) { + inst(CALL_NO_KW_BUILTIN_FAST, (unused/1, unused/2, unused/4, method, callable, args[oparg] -- res)) { /* Builtin METH_FASTCALL functions, without keywords */ assert(kwnames == NULL); int is_meth = method != NULL; @@ -2951,7 +2963,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_BUILTIN_FAST_WITH_KEYWORDS, (unused/1, unused/2, method, callable, args[oparg] -- res)) { + inst(CALL_BUILTIN_FAST_WITH_KEYWORDS, (unused/1, unused/2, unused/4, method, callable, args[oparg] -- res)) { /* Builtin METH_FASTCALL | METH_KEYWORDS functions */ int is_meth = method != NULL; int total_args = oparg; @@ -2986,7 +2998,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_NO_KW_LEN, (unused/1, unused/2, method, callable, args[oparg] -- res)) { + inst(CALL_NO_KW_LEN, (unused/1, unused/2, unused/4, method, callable, args[oparg] -- res)) { assert(kwnames == NULL); /* len(o) */ int is_meth = method != NULL; @@ -3013,7 +3025,7 @@ dummy_func( ERROR_IF(res == NULL, error); } - inst(CALL_NO_KW_ISINSTANCE, (unused/1, unused/2, method, callable, args[oparg] -- res)) { + inst(CALL_NO_KW_ISINSTANCE, (unused/1, unused/2, unused/4, method, callable, args[oparg] -- res)) { assert(kwnames == NULL); /* isinstance(o, o2) */ int is_meth = method != NULL; @@ -3043,7 +3055,7 @@ dummy_func( } // This is secretly a super-instruction - inst(CALL_NO_KW_LIST_APPEND, (unused/1, unused/2, method, self, args[oparg] -- unused)) { + inst(CALL_NO_KW_LIST_APPEND, (unused/1, unused/2, unused/4, method, self, args[oparg] -- unused)) { assert(kwnames == NULL); assert(oparg == 1); PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -3063,7 +3075,7 @@ dummy_func( DISPATCH(); } - inst(CALL_NO_KW_METHOD_DESCRIPTOR_O, (unused/1, unused/2, method, unused, args[oparg] -- res)) { + inst(CALL_NO_KW_METHOD_DESCRIPTOR_O, (unused/1, unused/2, unused/4, method, unused, args[oparg] -- res)) { assert(kwnames == NULL); int is_meth = method != NULL; int total_args = oparg; @@ -3097,7 +3109,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, (unused/1, unused/2, method, unused, args[oparg] -- res)) { + inst(CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, (unused/1, unused/2, unused/4, method, unused, args[oparg] -- res)) { int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -3129,7 +3141,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS, (unused/1, unused/2, method, unused, args[oparg] -- res)) { + inst(CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS, (unused/1, unused/2, unused/4, method, unused, args[oparg] -- res)) { assert(kwnames == NULL); assert(oparg == 0 || oparg == 1); int is_meth = method != NULL; @@ -3161,7 +3173,7 @@ dummy_func( CHECK_EVAL_BREAKER(); } - inst(CALL_NO_KW_METHOD_DESCRIPTOR_FAST, (unused/1, unused/2, method, unused, args[oparg] -- res)) { + inst(CALL_NO_KW_METHOD_DESCRIPTOR_FAST, (unused/1, unused/2, unused/4, method, unused, args[oparg] -- res)) { assert(kwnames == NULL); int is_meth = method != NULL; int total_args = oparg; @@ -3370,12 +3382,16 @@ dummy_func( top = Py_NewRef(bottom); } - inst(BINARY_OP, (unused/1, lhs, rhs -- res)) { + inst(BINARY_OP, (unused/1, unused/4, lhs, rhs -- res)) { #if ENABLE_SPECIALIZATION _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, &GETLOCAL(0)); + int result = _PyExternal_TrySpecialize(next_instr, &stack_pointer, (_PyCache *)cache); + if (result) { + oparg = next_instr->op.arg; + } DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_OP, deferred); @@ -3384,7 +3400,16 @@ dummy_func( assert(0 <= oparg); assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops)); assert(binary_ops[oparg]); - res = binary_ops[oparg](lhs, rhs); + CMLQ_PAPI_REGION("binary_op", res = binary_ops[oparg](lhs, rhs)); + DECREF_INPUTS(); + ERROR_IF(res == NULL, error); + } + + op(LONG_TRUE_DIVIDE, (unused/1, lhs, rhs -- res)) { + assert(0 <= oparg); + assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops)); + assert(binary_ops[oparg]); + res = _PyLong_True_Divide(lhs, rhs); DECREF_INPUTS(); ERROR_IF(res == NULL, error); } diff --git a/Python/ceval.c b/Python/ceval.c index fdb5b72e6c0f7b..d7c689e039c775 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -36,6 +36,9 @@ #include #include +//#include +//#include + #ifdef Py_DEBUG /* For debugging the interpreter: */ # define LLTRACE 1 /* Low-level trace feature */ @@ -314,6 +317,13 @@ static const binaryfunc binary_ops[] = { }; +static PyExternal_CodeHandler external_handlers[256]; +void +PyExternal_SetCodeHandler(int slot, PyExternal_CodeHandler handler) { + external_handlers[slot] = handler; +} + + // PEP 634: Structural Pattern Matching @@ -642,11 +652,45 @@ static inline void _Py_LeaveRecursiveCallPy(PyThreadState *tstate) { * so consume 3 units of C stack */ #define PY_EVAL_C_STACK_UNITS 2 +//static int init_numpy() { +// import_array(); +// import_ufunc(); +//} + + +static bool should_rewrite(_PyInterpreterFrame *frame) { + PyObject *value = _PyDict_GetItemWithError(GLOBALS(), &_Py_ID(__rewrite__)); + return value && PyObject_IsTrue(value); +} + +#define PC_OFFSET ((next_instr - (_Py_CODEUNIT *)frame->f_code->co_code_adaptive - 1) * 2) + +#ifdef CMLQ_PAPI +#include +static int papi_initialized = 0; +void init_papi() +{ + if (!papi_initialized) { + int retval = PAPI_library_init(PAPI_VER_CURRENT); + assert(retval == PAPI_VER_CURRENT); + papi_initialized = 1; + } +} +#endif + PyObject* _Py_HOT_FUNCTION _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int throwflag) { _Py_EnsureTstateNotNULL(tstate); CALL_STAT_INC(pyeval_calls); +#ifdef INSTR_STATS + struct timeval start, end; + CMLQStatsElem *stats_elem = NULL; +#endif + +#ifdef CMLQ_PAPI +init_papi(); +#endif #if USE_COMPUTED_GOTOS /* Import the static jump table */ @@ -735,6 +779,10 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int } resume_frame: +// if (PyArray_API == NULL && in_function(frame, "patch_and_run")) { +// init_numpy(); +// } + SET_LOCALS_FROM_FRAME(); #ifdef LLTRACE @@ -826,6 +874,8 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int if (_Py_HandlePending(tstate) != 0) { goto error; } + + DISPATCH(); { @@ -836,6 +886,32 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int #endif { + /* We cannot let the generator generate these cases because we want the extension to be responsible for stack + * handling */ + TARGET(BINARY_OP_EXTERNAL) { + _PyBinaryOpCache* cache = (_PyBinaryOpCache*)next_instr; + void* external_cache_pointer = POINTER_FROM_ARRAY(cache->external_cache_pointer); +#include "cmlq_external_opcode.h" + next_instr += INLINE_CACHE_ENTRIES_BINARY_OP; + DISPATCH(); + } + + TARGET(CALL_EXTERNAL) { + _PyCallCache *cache = (_PyCallCache *)next_instr; + void *external_cache_pointer = POINTER_FROM_ARRAY(cache->external_cache_pointer); +#include "cmlq_external_opcode.h" + next_instr += INLINE_CACHE_ENTRIES_CALL; + DISPATCH(); + } + + TARGET(BINARY_SUBSCR_EXTERNAL) { + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; + void *external_cache_pointer = POINTER_FROM_ARRAY(cache->external_cache_pointer); +#include "cmlq_external_opcode.h" + next_instr += INLINE_CACHE_ENTRIES_BINARY_SUBSCR; + DISPATCH(); + } + #include "generated_cases.c.h" /* INSTRUMENTED_LINE has to be here, rather than in bytecodes.c, diff --git a/Python/ceval_macros.h b/Python/ceval_macros.h index fccf9088cbd131..5e81a40fa9753a 100644 --- a/Python/ceval_macros.h +++ b/Python/ceval_macros.h @@ -48,17 +48,32 @@ #endif #ifdef HAVE_COMPUTED_GOTOS - #ifndef USE_COMPUTED_GOTOS - #define USE_COMPUTED_GOTOS 1 - #endif +#ifndef USE_COMPUTED_GOTOS +#define USE_COMPUTED_GOTOS 1 +#endif #else - #if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS +#if defined(USE_COMPUTED_GOTOS) && USE_COMPUTED_GOTOS #error "Computed gotos are not supported on this compiler." - #endif +#endif #undef USE_COMPUTED_GOTOS #define USE_COMPUTED_GOTOS 0 #endif +#ifdef INSTR_STATS +#define RECORD_STATS() \ +if (stats_elem) { \ +gettimeofday(&end, NULL); \ +stats_elem->exec_ms += (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); \ +} \ +gettimeofday(&start, NULL); \ +stats_elem = get_stats_elem(frame->f_code, next_instr); \ +if (stats_elem) { stats_elem->exec_count++; } + +#else +#define RECORD_STATS(op) ((void)0) +#endif + + #ifdef Py_STATS #define INSTRUCTION_START(op) \ do { \ @@ -68,7 +83,11 @@ lastopcode = op; \ } while (0) #else -#define INSTRUCTION_START(op) (frame->prev_instr = next_instr++) +#define INSTRUCTION_START(op) \ + RECORD_STATS(); \ + do { \ + frame->prev_instr = next_instr++; \ + } while (0) #endif #if USE_COMPUTED_GOTOS @@ -87,7 +106,6 @@ #define PRE_DISPATCH_GOTO() ((void)0) #endif - /* Do interpreter dispatch accounting for tracing and instrumentation */ #define DISPATCH() \ { \ @@ -126,8 +144,8 @@ #ifndef Py_DEBUG #define GETITEM(v, i) PyTuple_GET_ITEM((v), (i)) #else -static inline PyObject * -GETITEM(PyObject *v, Py_ssize_t i) { +static inline PyObject* +GETITEM(PyObject* v, Py_ssize_t i) { assert(PyTuple_Check(v)); assert(i >= 0); assert(i < PyTuple_GET_SIZE(v)); diff --git a/Python/cmlq_external_opcode.h b/Python/cmlq_external_opcode.h new file mode 100644 index 00000000000000..ef0da877e34bd5 --- /dev/null +++ b/Python/cmlq_external_opcode.h @@ -0,0 +1,15 @@ +#if defined(Py_OPT_CMLQ_ENV) || defined(Py_OPT_CMLQ_ALWAYS) +int result = external_handlers[oparg](external_cache_pointer, &stack_pointer); +if (result == 2) { + unsigned long offset = next_instr - 1 - _PyCode_CODE(frame->f_code); + next_instr = _PyExternal_Deoptimize(next_instr - 1, frame); + oparg = next_instr->op.arg; + DISPATCH_SAME_OPARG(); +} +#else +_PyErr_Format(tstate, PyExc_SystemError, + "%U:%d: Opcode should not be reachable, CMLQ is disabled", + frame->f_code->co_filename, + PyUnstable_InterpreterFrame_GetLine(frame)); +goto error; +#endif \ No newline at end of file diff --git a/Python/frame.c b/Python/frame.c index b84fd9b6a9380a..296855aca899a3 100644 --- a/Python/frame.c +++ b/Python/frame.c @@ -165,4 +165,4 @@ PyUnstable_InterpreterFrame_GetLine(_PyInterpreterFrame *frame) { int addr = _PyInterpreterFrame_LASTI(frame) * sizeof(_Py_CODEUNIT); return PyCode_Addr2Line(frame->f_code, addr); -} +} \ No newline at end of file diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index e4cff7bdc3384b..0ddced727388ec 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -57,7 +57,7 @@ TARGET(LOAD_CLOSURE) { PyObject *value; - #line 179 "Python/bytecodes.c" + #line 180 "Python/bytecodes.c" /* We keep LOAD_CLOSURE so that the bytecode stays more readable. */ value = GETLOCAL(oparg); if (value == NULL) goto unbound_local_error; @@ -70,7 +70,7 @@ TARGET(LOAD_FAST_CHECK) { PyObject *value; - #line 186 "Python/bytecodes.c" + #line 187 "Python/bytecodes.c" value = GETLOCAL(oparg); if (value == NULL) goto unbound_local_error; Py_INCREF(value); @@ -82,7 +82,7 @@ TARGET(LOAD_FAST) { PyObject *value; - #line 192 "Python/bytecodes.c" + #line 193 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -94,7 +94,7 @@ TARGET(LOAD_FAST_AND_CLEAR) { PyObject *value; - #line 198 "Python/bytecodes.c" + #line 199 "Python/bytecodes.c" value = GETLOCAL(oparg); // do not use SETLOCAL here, it decrefs the old value GETLOCAL(oparg) = NULL; @@ -107,7 +107,7 @@ TARGET(LOAD_CONST) { PREDICTED(LOAD_CONST); PyObject *value; - #line 204 "Python/bytecodes.c" + #line 205 "Python/bytecodes.c" value = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(value); #line 114 "Python/generated_cases.c.h" @@ -118,7 +118,7 @@ TARGET(STORE_FAST) { PyObject *value = stack_pointer[-1]; - #line 209 "Python/bytecodes.c" + #line 210 "Python/bytecodes.c" SETLOCAL(oparg, value); #line 124 "Python/generated_cases.c.h" STACK_SHRINK(1); @@ -130,7 +130,7 @@ PyObject *_tmp_2; { PyObject *value; - #line 192 "Python/bytecodes.c" + #line 193 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -140,7 +140,7 @@ oparg = (next_instr++)->op.arg; { PyObject *value; - #line 192 "Python/bytecodes.c" + #line 193 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -158,7 +158,7 @@ PyObject *_tmp_2; { PyObject *value; - #line 192 "Python/bytecodes.c" + #line 193 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -168,7 +168,7 @@ oparg = (next_instr++)->op.arg; { PyObject *value; - #line 204 "Python/bytecodes.c" + #line 205 "Python/bytecodes.c" value = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(value); #line 175 "Python/generated_cases.c.h" @@ -184,14 +184,14 @@ PyObject *_tmp_1 = stack_pointer[-1]; { PyObject *value = _tmp_1; - #line 209 "Python/bytecodes.c" + #line 210 "Python/bytecodes.c" SETLOCAL(oparg, value); #line 190 "Python/generated_cases.c.h" } oparg = (next_instr++)->op.arg; { PyObject *value; - #line 192 "Python/bytecodes.c" + #line 193 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -207,14 +207,14 @@ PyObject *_tmp_2 = stack_pointer[-2]; { PyObject *value = _tmp_1; - #line 209 "Python/bytecodes.c" + #line 210 "Python/bytecodes.c" SETLOCAL(oparg, value); #line 213 "Python/generated_cases.c.h" } oparg = (next_instr++)->op.arg; { PyObject *value = _tmp_2; - #line 209 "Python/bytecodes.c" + #line 210 "Python/bytecodes.c" SETLOCAL(oparg, value); #line 220 "Python/generated_cases.c.h" } @@ -227,7 +227,7 @@ PyObject *_tmp_2; { PyObject *value; - #line 204 "Python/bytecodes.c" + #line 205 "Python/bytecodes.c" value = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(value); #line 234 "Python/generated_cases.c.h" @@ -236,7 +236,7 @@ oparg = (next_instr++)->op.arg; { PyObject *value; - #line 192 "Python/bytecodes.c" + #line 193 "Python/bytecodes.c" value = GETLOCAL(oparg); assert(value != NULL); Py_INCREF(value); @@ -251,7 +251,7 @@ TARGET(POP_TOP) { PyObject *value = stack_pointer[-1]; - #line 219 "Python/bytecodes.c" + #line 220 "Python/bytecodes.c" #line 256 "Python/generated_cases.c.h" Py_DECREF(value); STACK_SHRINK(1); @@ -260,7 +260,7 @@ TARGET(PUSH_NULL) { PyObject *res; - #line 223 "Python/bytecodes.c" + #line 224 "Python/bytecodes.c" res = NULL; #line 266 "Python/generated_cases.c.h" STACK_GROW(1); @@ -273,13 +273,13 @@ PyObject *_tmp_2 = stack_pointer[-2]; { PyObject *value = _tmp_1; - #line 219 "Python/bytecodes.c" + #line 220 "Python/bytecodes.c" #line 278 "Python/generated_cases.c.h" Py_DECREF(value); } { PyObject *value = _tmp_2; - #line 219 "Python/bytecodes.c" + #line 220 "Python/bytecodes.c" #line 284 "Python/generated_cases.c.h" Py_DECREF(value); } @@ -290,7 +290,7 @@ TARGET(INSTRUMENTED_END_FOR) { PyObject *value = stack_pointer[-1]; PyObject *receiver = stack_pointer[-2]; - #line 229 "Python/bytecodes.c" + #line 230 "Python/bytecodes.c" /* Need to create a fake StopIteration error here, * to conform to PEP 380 */ if (PyGen_Check(receiver)) { @@ -310,7 +310,7 @@ TARGET(END_SEND) { PyObject *value = stack_pointer[-1]; PyObject *receiver = stack_pointer[-2]; - #line 242 "Python/bytecodes.c" + #line 243 "Python/bytecodes.c" Py_DECREF(receiver); #line 316 "Python/generated_cases.c.h" STACK_SHRINK(1); @@ -321,7 +321,7 @@ TARGET(INSTRUMENTED_END_SEND) { PyObject *value = stack_pointer[-1]; PyObject *receiver = stack_pointer[-2]; - #line 246 "Python/bytecodes.c" + #line 247 "Python/bytecodes.c" if (PyGen_Check(receiver) || PyCoro_CheckExact(receiver)) { PyErr_SetObject(PyExc_StopIteration, value); if (monitor_stop_iteration(tstate, frame, next_instr-1)) { @@ -339,11 +339,11 @@ TARGET(UNARY_NEGATIVE) { PyObject *value = stack_pointer[-1]; PyObject *res; - #line 257 "Python/bytecodes.c" + #line 258 "Python/bytecodes.c" res = PyNumber_Negative(value); #line 345 "Python/generated_cases.c.h" Py_DECREF(value); - #line 259 "Python/bytecodes.c" + #line 260 "Python/bytecodes.c" if (res == NULL) goto pop_1_error; #line 349 "Python/generated_cases.c.h" stack_pointer[-1] = res; @@ -353,11 +353,11 @@ TARGET(UNARY_NOT) { PyObject *value = stack_pointer[-1]; PyObject *res; - #line 263 "Python/bytecodes.c" + #line 264 "Python/bytecodes.c" int err = PyObject_IsTrue(value); #line 359 "Python/generated_cases.c.h" Py_DECREF(value); - #line 265 "Python/bytecodes.c" + #line 266 "Python/bytecodes.c" if (err < 0) goto pop_1_error; if (err == 0) { res = Py_True; @@ -373,11 +373,11 @@ TARGET(UNARY_INVERT) { PyObject *value = stack_pointer[-1]; PyObject *res; - #line 275 "Python/bytecodes.c" + #line 276 "Python/bytecodes.c" res = PyNumber_Invert(value); #line 379 "Python/generated_cases.c.h" Py_DECREF(value); - #line 277 "Python/bytecodes.c" + #line 278 "Python/bytecodes.c" if (res == NULL) goto pop_1_error; #line 383 "Python/generated_cases.c.h" stack_pointer[-1] = res; @@ -388,7 +388,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *prod; - #line 294 "Python/bytecodes.c" + #line 295 "Python/bytecodes.c" DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -399,7 +399,7 @@ #line 400 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = prod; - next_instr += 1; + next_instr += 5; DISPATCH(); } @@ -407,7 +407,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *prod; - #line 304 "Python/bytecodes.c" + #line 305 "Python/bytecodes.c" DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -417,7 +417,7 @@ #line 418 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = prod; - next_instr += 1; + next_instr += 5; DISPATCH(); } @@ -425,7 +425,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *sub; - #line 313 "Python/bytecodes.c" + #line 314 "Python/bytecodes.c" DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(!PyLong_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -436,7 +436,7 @@ #line 437 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = sub; - next_instr += 1; + next_instr += 5; DISPATCH(); } @@ -444,7 +444,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *sub; - #line 323 "Python/bytecodes.c" + #line 324 "Python/bytecodes.c" DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(!PyFloat_CheckExact(right), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -453,7 +453,7 @@ #line 454 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = sub; - next_instr += 1; + next_instr += 5; DISPATCH(); } @@ -461,7 +461,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 331 "Python/bytecodes.c" + #line 332 "Python/bytecodes.c" DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -472,14 +472,14 @@ #line 473 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 1; + next_instr += 5; DISPATCH(); } TARGET(BINARY_OP_INPLACE_ADD_UNICODE) { PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; - #line 347 "Python/bytecodes.c" + #line 348 "Python/bytecodes.c" DEOPT_IF(!PyUnicode_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); _Py_CODEUNIT true_next = next_instr[INLINE_CACHE_ENTRIES_BINARY_OP]; @@ -515,7 +515,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *sum; - #line 376 "Python/bytecodes.c" + #line 377 "Python/bytecodes.c" DEOPT_IF(!PyFloat_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -525,7 +525,7 @@ #line 526 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = sum; - next_instr += 1; + next_instr += 5; DISPATCH(); } @@ -533,7 +533,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *sum; - #line 385 "Python/bytecodes.c" + #line 386 "Python/bytecodes.c" DEOPT_IF(!PyLong_CheckExact(left), BINARY_OP); DEOPT_IF(Py_TYPE(right) != Py_TYPE(left), BINARY_OP); STAT_INC(BINARY_OP, hit); @@ -544,37 +544,41 @@ #line 545 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = sum; - next_instr += 1; + next_instr += 5; DISPATCH(); } TARGET(BINARY_SUBSCR) { PREDICTED(BINARY_SUBSCR); - static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 1, "incorrect cache size"); + static_assert(INLINE_CACHE_ENTRIES_BINARY_SUBSCR == 5, "incorrect cache size"); PyObject *sub = stack_pointer[-1]; PyObject *container = stack_pointer[-2]; PyObject *res; - #line 403 "Python/bytecodes.c" + #line 404 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_BinarySubscr(container, sub, next_instr); + int result = _PyExternal_TrySpecialize(next_instr, &stack_pointer, (_PyCache *)cache); + if (result) { + oparg = next_instr->op.arg; + } DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_SUBSCR, deferred); DECREMENT_ADAPTIVE_COUNTER(cache->counter); #endif /* ENABLE_SPECIALIZATION */ res = PyObject_GetItem(container, sub); - #line 570 "Python/generated_cases.c.h" + #line 574 "Python/generated_cases.c.h" Py_DECREF(container); Py_DECREF(sub); - #line 415 "Python/bytecodes.c" + #line 420 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 575 "Python/generated_cases.c.h" + #line 579 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 1; + next_instr += 5; DISPATCH(); } @@ -583,7 +587,7 @@ PyObject *start = stack_pointer[-2]; PyObject *container = stack_pointer[-3]; PyObject *res; - #line 419 "Python/bytecodes.c" + #line 424 "Python/bytecodes.c" PyObject *slice = _PyBuildSlice_ConsumeRefs(start, stop); // Can't use ERROR_IF() here, because we haven't // DECREF'ed container yet, and we still own slice. @@ -596,7 +600,7 @@ } Py_DECREF(container); if (res == NULL) goto pop_3_error; - #line 600 "Python/generated_cases.c.h" + #line 604 "Python/generated_cases.c.h" STACK_SHRINK(2); stack_pointer[-1] = res; DISPATCH(); @@ -607,7 +611,7 @@ PyObject *start = stack_pointer[-2]; PyObject *container = stack_pointer[-3]; PyObject *v = stack_pointer[-4]; - #line 434 "Python/bytecodes.c" + #line 439 "Python/bytecodes.c" PyObject *slice = _PyBuildSlice_ConsumeRefs(start, stop); int err; if (slice == NULL) { @@ -620,7 +624,7 @@ Py_DECREF(v); Py_DECREF(container); if (err) goto pop_4_error; - #line 624 "Python/generated_cases.c.h" + #line 628 "Python/generated_cases.c.h" STACK_SHRINK(4); DISPATCH(); } @@ -629,7 +633,7 @@ PyObject *sub = stack_pointer[-1]; PyObject *list = stack_pointer[-2]; PyObject *res; - #line 449 "Python/bytecodes.c" + #line 454 "Python/bytecodes.c" DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), BINARY_SUBSCR); @@ -643,10 +647,10 @@ Py_INCREF(res); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); Py_DECREF(list); - #line 647 "Python/generated_cases.c.h" + #line 651 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 1; + next_instr += 5; DISPATCH(); } @@ -654,7 +658,7 @@ PyObject *sub = stack_pointer[-1]; PyObject *tuple = stack_pointer[-2]; PyObject *res; - #line 465 "Python/bytecodes.c" + #line 470 "Python/bytecodes.c" DEOPT_IF(!PyLong_CheckExact(sub), BINARY_SUBSCR); DEOPT_IF(!PyTuple_CheckExact(tuple), BINARY_SUBSCR); @@ -668,10 +672,10 @@ Py_INCREF(res); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); Py_DECREF(tuple); - #line 672 "Python/generated_cases.c.h" + #line 676 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 1; + next_instr += 5; DISPATCH(); } @@ -679,7 +683,7 @@ PyObject *sub = stack_pointer[-1]; PyObject *dict = stack_pointer[-2]; PyObject *res; - #line 481 "Python/bytecodes.c" + #line 486 "Python/bytecodes.c" DEOPT_IF(!PyDict_CheckExact(dict), BINARY_SUBSCR); STAT_INC(BINARY_SUBSCR, hit); res = PyDict_GetItemWithError(dict, sub); @@ -687,26 +691,26 @@ if (!_PyErr_Occurred(tstate)) { _PyErr_SetKeyError(sub); } - #line 691 "Python/generated_cases.c.h" + #line 695 "Python/generated_cases.c.h" Py_DECREF(dict); Py_DECREF(sub); - #line 489 "Python/bytecodes.c" + #line 494 "Python/bytecodes.c" if (true) goto pop_2_error; } Py_INCREF(res); // Do this before DECREF'ing dict, sub - #line 698 "Python/generated_cases.c.h" + #line 702 "Python/generated_cases.c.h" Py_DECREF(dict); Py_DECREF(sub); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 1; + next_instr += 5; DISPATCH(); } TARGET(BINARY_SUBSCR_GETITEM) { PyObject *sub = stack_pointer[-1]; PyObject *container = stack_pointer[-2]; - #line 496 "Python/bytecodes.c" + #line 501 "Python/bytecodes.c" DEOPT_IF(tstate->interp->eval_frame, BINARY_SUBSCR); PyTypeObject *tp = Py_TYPE(container); DEOPT_IF(!PyType_HasFeature(tp, Py_TPFLAGS_HEAPTYPE), BINARY_SUBSCR); @@ -729,15 +733,15 @@ JUMPBY(INLINE_CACHE_ENTRIES_BINARY_SUBSCR); frame->return_offset = 0; DISPATCH_INLINED(new_frame); - #line 733 "Python/generated_cases.c.h" + #line 737 "Python/generated_cases.c.h" } TARGET(LIST_APPEND) { PyObject *v = stack_pointer[-1]; PyObject *list = stack_pointer[-(2 + (oparg-1))]; - #line 521 "Python/bytecodes.c" + #line 527 "Python/bytecodes.c" if (_PyList_AppendTakeRef((PyListObject *)list, v) < 0) goto pop_1_error; - #line 741 "Python/generated_cases.c.h" + #line 745 "Python/generated_cases.c.h" STACK_SHRINK(1); PREDICT(JUMP_BACKWARD); DISPATCH(); @@ -746,13 +750,13 @@ TARGET(SET_ADD) { PyObject *v = stack_pointer[-1]; PyObject *set = stack_pointer[-(2 + (oparg-1))]; - #line 526 "Python/bytecodes.c" + #line 532 "Python/bytecodes.c" int err = PySet_Add(set, v); - #line 752 "Python/generated_cases.c.h" + #line 756 "Python/generated_cases.c.h" Py_DECREF(v); - #line 528 "Python/bytecodes.c" + #line 534 "Python/bytecodes.c" if (err) goto pop_1_error; - #line 756 "Python/generated_cases.c.h" + #line 760 "Python/generated_cases.c.h" STACK_SHRINK(1); PREDICT(JUMP_BACKWARD); DISPATCH(); @@ -765,7 +769,7 @@ PyObject *container = stack_pointer[-2]; PyObject *v = stack_pointer[-3]; uint16_t counter = read_u16(&next_instr[0].cache); - #line 539 "Python/bytecodes.c" + #line 545 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { next_instr--; @@ -780,13 +784,13 @@ #endif /* ENABLE_SPECIALIZATION */ /* container[sub] = v */ int err = PyObject_SetItem(container, sub, v); - #line 784 "Python/generated_cases.c.h" + #line 788 "Python/generated_cases.c.h" Py_DECREF(v); Py_DECREF(container); Py_DECREF(sub); - #line 554 "Python/bytecodes.c" + #line 560 "Python/bytecodes.c" if (err) goto pop_3_error; - #line 790 "Python/generated_cases.c.h" + #line 794 "Python/generated_cases.c.h" STACK_SHRINK(3); next_instr += 1; DISPATCH(); @@ -796,7 +800,7 @@ PyObject *sub = stack_pointer[-1]; PyObject *list = stack_pointer[-2]; PyObject *value = stack_pointer[-3]; - #line 558 "Python/bytecodes.c" + #line 564 "Python/bytecodes.c" DEOPT_IF(!PyLong_CheckExact(sub), STORE_SUBSCR); DEOPT_IF(!PyList_CheckExact(list), STORE_SUBSCR); @@ -813,7 +817,7 @@ Py_DECREF(old_value); _Py_DECREF_SPECIALIZED(sub, (destructor)PyObject_Free); Py_DECREF(list); - #line 817 "Python/generated_cases.c.h" + #line 821 "Python/generated_cases.c.h" STACK_SHRINK(3); next_instr += 1; DISPATCH(); @@ -823,13 +827,13 @@ PyObject *sub = stack_pointer[-1]; PyObject *dict = stack_pointer[-2]; PyObject *value = stack_pointer[-3]; - #line 577 "Python/bytecodes.c" + #line 583 "Python/bytecodes.c" DEOPT_IF(!PyDict_CheckExact(dict), STORE_SUBSCR); STAT_INC(STORE_SUBSCR, hit); int err = _PyDict_SetItem_Take2((PyDictObject *)dict, sub, value); Py_DECREF(dict); if (err) goto pop_3_error; - #line 833 "Python/generated_cases.c.h" + #line 837 "Python/generated_cases.c.h" STACK_SHRINK(3); next_instr += 1; DISPATCH(); @@ -838,15 +842,15 @@ TARGET(DELETE_SUBSCR) { PyObject *sub = stack_pointer[-1]; PyObject *container = stack_pointer[-2]; - #line 585 "Python/bytecodes.c" + #line 591 "Python/bytecodes.c" /* del container[sub] */ int err = PyObject_DelItem(container, sub); - #line 845 "Python/generated_cases.c.h" + #line 849 "Python/generated_cases.c.h" Py_DECREF(container); Py_DECREF(sub); - #line 588 "Python/bytecodes.c" + #line 594 "Python/bytecodes.c" if (err) goto pop_2_error; - #line 850 "Python/generated_cases.c.h" + #line 854 "Python/generated_cases.c.h" STACK_SHRINK(2); DISPATCH(); } @@ -854,14 +858,14 @@ TARGET(CALL_INTRINSIC_1) { PyObject *value = stack_pointer[-1]; PyObject *res; - #line 592 "Python/bytecodes.c" + #line 598 "Python/bytecodes.c" assert(oparg <= MAX_INTRINSIC_1); res = _PyIntrinsics_UnaryFunctions[oparg](tstate, value); - #line 861 "Python/generated_cases.c.h" + #line 865 "Python/generated_cases.c.h" Py_DECREF(value); - #line 595 "Python/bytecodes.c" + #line 601 "Python/bytecodes.c" if (res == NULL) goto pop_1_error; - #line 865 "Python/generated_cases.c.h" + #line 869 "Python/generated_cases.c.h" stack_pointer[-1] = res; DISPATCH(); } @@ -870,15 +874,15 @@ PyObject *value1 = stack_pointer[-1]; PyObject *value2 = stack_pointer[-2]; PyObject *res; - #line 599 "Python/bytecodes.c" + #line 605 "Python/bytecodes.c" assert(oparg <= MAX_INTRINSIC_2); res = _PyIntrinsics_BinaryFunctions[oparg](tstate, value2, value1); - #line 877 "Python/generated_cases.c.h" + #line 881 "Python/generated_cases.c.h" Py_DECREF(value2); Py_DECREF(value1); - #line 602 "Python/bytecodes.c" + #line 608 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 882 "Python/generated_cases.c.h" + #line 886 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; DISPATCH(); @@ -886,7 +890,7 @@ TARGET(RAISE_VARARGS) { PyObject **args = (stack_pointer - oparg); - #line 606 "Python/bytecodes.c" + #line 612 "Python/bytecodes.c" PyObject *cause = NULL, *exc = NULL; switch (oparg) { case 2: @@ -908,12 +912,12 @@ break; } if (true) { STACK_SHRINK(oparg); goto error; } - #line 912 "Python/generated_cases.c.h" + #line 916 "Python/generated_cases.c.h" } TARGET(INTERPRETER_EXIT) { PyObject *retval = stack_pointer[-1]; - #line 630 "Python/bytecodes.c" + #line 636 "Python/bytecodes.c" assert(frame == &entry_frame); assert(_PyFrame_IsIncomplete(frame)); STACK_SHRINK(1); // Since we're not going to DISPATCH() @@ -924,15 +928,16 @@ assert(!_PyErr_Occurred(tstate)); tstate->c_recursion_remaining += PY_EVAL_C_STACK_UNITS; return retval; - #line 928 "Python/generated_cases.c.h" + #line 932 "Python/generated_cases.c.h" } TARGET(RETURN_VALUE) { PyObject *retval = stack_pointer[-1]; - #line 643 "Python/bytecodes.c" + #line 649 "Python/bytecodes.c" STACK_SHRINK(1); assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); + _PyExternal_FunctionEnd(frame); _Py_LeaveRecursiveCallPy(tstate); assert(frame != &entry_frame); // GH-99729: We need to unlink the frame *before* clearing it: @@ -942,12 +947,12 @@ frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 946 "Python/generated_cases.c.h" + #line 951 "Python/generated_cases.c.h" } TARGET(INSTRUMENTED_RETURN_VALUE) { PyObject *retval = stack_pointer[-1]; - #line 658 "Python/bytecodes.c" + #line 665 "Python/bytecodes.c" int err = _Py_call_instrumentation_arg( tstate, PY_MONITORING_EVENT_PY_RETURN, frame, next_instr-1, retval); @@ -964,16 +969,17 @@ frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 968 "Python/generated_cases.c.h" + #line 973 "Python/generated_cases.c.h" } TARGET(RETURN_CONST) { - #line 677 "Python/bytecodes.c" + #line 684 "Python/bytecodes.c" PyObject *retval = GETITEM(frame->f_code->co_consts, oparg); Py_INCREF(retval); assert(EMPTY()); _PyFrame_SetStackPointer(frame, stack_pointer); _Py_LeaveRecursiveCallPy(tstate); + _PyExternal_FunctionEnd(frame); assert(frame != &entry_frame); // GH-99729: We need to unlink the frame *before* clearing it: _PyInterpreterFrame *dying = frame; @@ -982,11 +988,11 @@ frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 986 "Python/generated_cases.c.h" + #line 992 "Python/generated_cases.c.h" } TARGET(INSTRUMENTED_RETURN_CONST) { - #line 693 "Python/bytecodes.c" + #line 701 "Python/bytecodes.c" PyObject *retval = GETITEM(frame->f_code->co_consts, oparg); int err = _Py_call_instrumentation_arg( tstate, PY_MONITORING_EVENT_PY_RETURN, @@ -1004,13 +1010,13 @@ frame->prev_instr += frame->return_offset; _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 1008 "Python/generated_cases.c.h" + #line 1014 "Python/generated_cases.c.h" } TARGET(GET_AITER) { PyObject *obj = stack_pointer[-1]; PyObject *iter; - #line 713 "Python/bytecodes.c" + #line 721 "Python/bytecodes.c" unaryfunc getter = NULL; PyTypeObject *type = Py_TYPE(obj); @@ -1023,16 +1029,16 @@ "'async for' requires an object with " "__aiter__ method, got %.100s", type->tp_name); - #line 1027 "Python/generated_cases.c.h" + #line 1033 "Python/generated_cases.c.h" Py_DECREF(obj); - #line 726 "Python/bytecodes.c" + #line 734 "Python/bytecodes.c" if (true) goto pop_1_error; } iter = (*getter)(obj); - #line 1034 "Python/generated_cases.c.h" + #line 1040 "Python/generated_cases.c.h" Py_DECREF(obj); - #line 731 "Python/bytecodes.c" + #line 739 "Python/bytecodes.c" if (iter == NULL) goto pop_1_error; if (Py_TYPE(iter)->tp_as_async == NULL || @@ -1045,7 +1051,7 @@ Py_DECREF(iter); if (true) goto pop_1_error; } - #line 1049 "Python/generated_cases.c.h" + #line 1055 "Python/generated_cases.c.h" stack_pointer[-1] = iter; DISPATCH(); } @@ -1053,7 +1059,7 @@ TARGET(GET_ANEXT) { PyObject *aiter = stack_pointer[-1]; PyObject *awaitable; - #line 746 "Python/bytecodes.c" + #line 754 "Python/bytecodes.c" unaryfunc getter = NULL; PyObject *next_iter = NULL; PyTypeObject *type = Py_TYPE(aiter); @@ -1097,7 +1103,7 @@ } } - #line 1101 "Python/generated_cases.c.h" + #line 1107 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = awaitable; PREDICT(LOAD_CONST); @@ -1108,16 +1114,16 @@ PREDICTED(GET_AWAITABLE); PyObject *iterable = stack_pointer[-1]; PyObject *iter; - #line 793 "Python/bytecodes.c" + #line 801 "Python/bytecodes.c" iter = _PyCoro_GetAwaitableIter(iterable); if (iter == NULL) { format_awaitable_error(tstate, Py_TYPE(iterable), oparg); } - #line 1119 "Python/generated_cases.c.h" + #line 1125 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 800 "Python/bytecodes.c" + #line 808 "Python/bytecodes.c" if (iter != NULL && PyCoro_CheckExact(iter)) { PyObject *yf = _PyGen_yf((PyGenObject*)iter); @@ -1135,7 +1141,7 @@ if (iter == NULL) goto pop_1_error; - #line 1139 "Python/generated_cases.c.h" + #line 1145 "Python/generated_cases.c.h" stack_pointer[-1] = iter; PREDICT(LOAD_CONST); DISPATCH(); @@ -1147,7 +1153,7 @@ PyObject *v = stack_pointer[-1]; PyObject *receiver = stack_pointer[-2]; PyObject *retval; - #line 826 "Python/bytecodes.c" + #line 834 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PySendCache *cache = (_PySendCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -1194,7 +1200,7 @@ } } Py_DECREF(v); - #line 1198 "Python/generated_cases.c.h" + #line 1204 "Python/generated_cases.c.h" stack_pointer[-1] = retval; next_instr += 1; DISPATCH(); @@ -1203,7 +1209,7 @@ TARGET(SEND_GEN) { PyObject *v = stack_pointer[-1]; PyObject *receiver = stack_pointer[-2]; - #line 875 "Python/bytecodes.c" + #line 883 "Python/bytecodes.c" DEOPT_IF(tstate->interp->eval_frame, SEND); PyGenObject *gen = (PyGenObject *)receiver; DEOPT_IF(Py_TYPE(gen) != &PyGen_Type && @@ -1219,12 +1225,12 @@ tstate->exc_info = &gen->gi_exc_state; JUMPBY(INLINE_CACHE_ENTRIES_SEND); DISPATCH_INLINED(gen_frame); - #line 1223 "Python/generated_cases.c.h" + #line 1229 "Python/generated_cases.c.h" } TARGET(INSTRUMENTED_YIELD_VALUE) { PyObject *retval = stack_pointer[-1]; - #line 893 "Python/bytecodes.c" + #line 901 "Python/bytecodes.c" assert(frame != &entry_frame); PyGenObject *gen = _PyFrame_GetGenerator(frame); gen->gi_frame_state = FRAME_SUSPENDED; @@ -1241,12 +1247,12 @@ gen_frame->previous = NULL; _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 1245 "Python/generated_cases.c.h" + #line 1251 "Python/generated_cases.c.h" } TARGET(YIELD_VALUE) { PyObject *retval = stack_pointer[-1]; - #line 912 "Python/bytecodes.c" + #line 920 "Python/bytecodes.c" // NOTE: It's important that YIELD_VALUE never raises an exception! // The compiler treats any exception raised here as a failed close() // or throw() call. @@ -1262,15 +1268,15 @@ gen_frame->previous = NULL; _PyFrame_StackPush(frame, retval); goto resume_frame; - #line 1266 "Python/generated_cases.c.h" + #line 1272 "Python/generated_cases.c.h" } TARGET(POP_EXCEPT) { PyObject *exc_value = stack_pointer[-1]; - #line 930 "Python/bytecodes.c" + #line 938 "Python/bytecodes.c" _PyErr_StackItem *exc_info = tstate->exc_info; Py_XSETREF(exc_info->exc_value, exc_value); - #line 1274 "Python/generated_cases.c.h" + #line 1280 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } @@ -1278,7 +1284,7 @@ TARGET(RERAISE) { PyObject *exc = stack_pointer[-1]; PyObject **values = (stack_pointer - (1 + oparg)); - #line 935 "Python/bytecodes.c" + #line 943 "Python/bytecodes.c" assert(oparg >= 0 && oparg <= 2); if (oparg) { PyObject *lasti = values[0]; @@ -1297,19 +1303,19 @@ _PyErr_SetRaisedException(tstate, exc); monitor_reraise(tstate, frame, next_instr-1); goto exception_unwind; - #line 1301 "Python/generated_cases.c.h" + #line 1307 "Python/generated_cases.c.h" } TARGET(END_ASYNC_FOR) { PyObject *exc = stack_pointer[-1]; PyObject *awaitable = stack_pointer[-2]; - #line 956 "Python/bytecodes.c" + #line 964 "Python/bytecodes.c" assert(exc && PyExceptionInstance_Check(exc)); if (PyErr_GivenExceptionMatches(exc, PyExc_StopAsyncIteration)) { - #line 1310 "Python/generated_cases.c.h" + #line 1316 "Python/generated_cases.c.h" Py_DECREF(awaitable); Py_DECREF(exc); - #line 959 "Python/bytecodes.c" + #line 967 "Python/bytecodes.c" } else { Py_INCREF(exc); @@ -1317,7 +1323,7 @@ monitor_reraise(tstate, frame, next_instr-1); goto exception_unwind; } - #line 1321 "Python/generated_cases.c.h" + #line 1327 "Python/generated_cases.c.h" STACK_SHRINK(2); DISPATCH(); } @@ -1328,16 +1334,16 @@ PyObject *sub_iter = stack_pointer[-3]; PyObject *none; PyObject *value; - #line 969 "Python/bytecodes.c" + #line 977 "Python/bytecodes.c" assert(throwflag); assert(exc_value && PyExceptionInstance_Check(exc_value)); if (PyErr_GivenExceptionMatches(exc_value, PyExc_StopIteration)) { value = Py_NewRef(((PyStopIterationObject *)exc_value)->value); - #line 1337 "Python/generated_cases.c.h" + #line 1343 "Python/generated_cases.c.h" Py_DECREF(sub_iter); Py_DECREF(last_sent_val); Py_DECREF(exc_value); - #line 974 "Python/bytecodes.c" + #line 982 "Python/bytecodes.c" none = Py_None; } else { @@ -1345,7 +1351,7 @@ monitor_reraise(tstate, frame, next_instr-1); goto exception_unwind; } - #line 1349 "Python/generated_cases.c.h" + #line 1355 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = value; stack_pointer[-2] = none; @@ -1354,9 +1360,9 @@ TARGET(LOAD_ASSERTION_ERROR) { PyObject *value; - #line 984 "Python/bytecodes.c" + #line 992 "Python/bytecodes.c" value = Py_NewRef(PyExc_AssertionError); - #line 1360 "Python/generated_cases.c.h" + #line 1366 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -1364,7 +1370,7 @@ TARGET(LOAD_BUILD_CLASS) { PyObject *bc; - #line 988 "Python/bytecodes.c" + #line 996 "Python/bytecodes.c" if (PyDict_CheckExact(BUILTINS())) { bc = _PyDict_GetItemWithError(BUILTINS(), &_Py_ID(__build_class__)); @@ -1386,7 +1392,7 @@ if (true) goto error; } } - #line 1390 "Python/generated_cases.c.h" + #line 1396 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = bc; DISPATCH(); @@ -1394,33 +1400,33 @@ TARGET(STORE_NAME) { PyObject *v = stack_pointer[-1]; - #line 1013 "Python/bytecodes.c" + #line 1021 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); PyObject *ns = LOCALS(); int err; if (ns == NULL) { _PyErr_Format(tstate, PyExc_SystemError, "no locals found when storing %R", name); - #line 1405 "Python/generated_cases.c.h" + #line 1411 "Python/generated_cases.c.h" Py_DECREF(v); - #line 1020 "Python/bytecodes.c" + #line 1028 "Python/bytecodes.c" if (true) goto pop_1_error; } if (PyDict_CheckExact(ns)) err = PyDict_SetItem(ns, name, v); else err = PyObject_SetItem(ns, name, v); - #line 1414 "Python/generated_cases.c.h" + #line 1420 "Python/generated_cases.c.h" Py_DECREF(v); - #line 1027 "Python/bytecodes.c" + #line 1035 "Python/bytecodes.c" if (err) goto pop_1_error; - #line 1418 "Python/generated_cases.c.h" + #line 1424 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(DELETE_NAME) { - #line 1031 "Python/bytecodes.c" + #line 1039 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); PyObject *ns = LOCALS(); int err; @@ -1437,7 +1443,7 @@ name); goto error; } - #line 1441 "Python/generated_cases.c.h" + #line 1447 "Python/generated_cases.c.h" DISPATCH(); } @@ -1445,7 +1451,7 @@ PREDICTED(UNPACK_SEQUENCE); static_assert(INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE == 1, "incorrect cache size"); PyObject *seq = stack_pointer[-1]; - #line 1057 "Python/bytecodes.c" + #line 1065 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyUnpackSequenceCache *cache = (_PyUnpackSequenceCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -1458,11 +1464,11 @@ #endif /* ENABLE_SPECIALIZATION */ PyObject **top = stack_pointer + oparg - 1; int res = unpack_iterable(tstate, seq, oparg, -1, top); - #line 1462 "Python/generated_cases.c.h" + #line 1468 "Python/generated_cases.c.h" Py_DECREF(seq); - #line 1070 "Python/bytecodes.c" + #line 1078 "Python/bytecodes.c" if (res == 0) goto pop_1_error; - #line 1466 "Python/generated_cases.c.h" + #line 1472 "Python/generated_cases.c.h" STACK_SHRINK(1); STACK_GROW(oparg); next_instr += 1; @@ -1472,14 +1478,14 @@ TARGET(UNPACK_SEQUENCE_TWO_TUPLE) { PyObject *seq = stack_pointer[-1]; PyObject **values = stack_pointer - (1); - #line 1074 "Python/bytecodes.c" + #line 1082 "Python/bytecodes.c" DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE); DEOPT_IF(PyTuple_GET_SIZE(seq) != 2, UNPACK_SEQUENCE); assert(oparg == 2); STAT_INC(UNPACK_SEQUENCE, hit); values[0] = Py_NewRef(PyTuple_GET_ITEM(seq, 1)); values[1] = Py_NewRef(PyTuple_GET_ITEM(seq, 0)); - #line 1483 "Python/generated_cases.c.h" + #line 1489 "Python/generated_cases.c.h" Py_DECREF(seq); STACK_SHRINK(1); STACK_GROW(oparg); @@ -1490,7 +1496,7 @@ TARGET(UNPACK_SEQUENCE_TUPLE) { PyObject *seq = stack_pointer[-1]; PyObject **values = stack_pointer - (1); - #line 1084 "Python/bytecodes.c" + #line 1092 "Python/bytecodes.c" DEOPT_IF(!PyTuple_CheckExact(seq), UNPACK_SEQUENCE); DEOPT_IF(PyTuple_GET_SIZE(seq) != oparg, UNPACK_SEQUENCE); STAT_INC(UNPACK_SEQUENCE, hit); @@ -1498,7 +1504,7 @@ for (int i = oparg; --i >= 0; ) { *values++ = Py_NewRef(items[i]); } - #line 1502 "Python/generated_cases.c.h" + #line 1508 "Python/generated_cases.c.h" Py_DECREF(seq); STACK_SHRINK(1); STACK_GROW(oparg); @@ -1509,7 +1515,7 @@ TARGET(UNPACK_SEQUENCE_LIST) { PyObject *seq = stack_pointer[-1]; PyObject **values = stack_pointer - (1); - #line 1095 "Python/bytecodes.c" + #line 1103 "Python/bytecodes.c" DEOPT_IF(!PyList_CheckExact(seq), UNPACK_SEQUENCE); DEOPT_IF(PyList_GET_SIZE(seq) != oparg, UNPACK_SEQUENCE); STAT_INC(UNPACK_SEQUENCE, hit); @@ -1517,7 +1523,7 @@ for (int i = oparg; --i >= 0; ) { *values++ = Py_NewRef(items[i]); } - #line 1521 "Python/generated_cases.c.h" + #line 1527 "Python/generated_cases.c.h" Py_DECREF(seq); STACK_SHRINK(1); STACK_GROW(oparg); @@ -1527,15 +1533,15 @@ TARGET(UNPACK_EX) { PyObject *seq = stack_pointer[-1]; - #line 1106 "Python/bytecodes.c" + #line 1114 "Python/bytecodes.c" int totalargs = 1 + (oparg & 0xFF) + (oparg >> 8); PyObject **top = stack_pointer + totalargs - 1; int res = unpack_iterable(tstate, seq, oparg & 0xFF, oparg >> 8, top); - #line 1535 "Python/generated_cases.c.h" + #line 1541 "Python/generated_cases.c.h" Py_DECREF(seq); - #line 1110 "Python/bytecodes.c" + #line 1118 "Python/bytecodes.c" if (res == 0) goto pop_1_error; - #line 1539 "Python/generated_cases.c.h" + #line 1545 "Python/generated_cases.c.h" STACK_GROW((oparg & 0xFF) + (oparg >> 8)); DISPATCH(); } @@ -1546,7 +1552,7 @@ PyObject *owner = stack_pointer[-1]; PyObject *v = stack_pointer[-2]; uint16_t counter = read_u16(&next_instr[0].cache); - #line 1121 "Python/bytecodes.c" + #line 1129 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION if (ADAPTIVE_COUNTER_IS_ZERO(counter)) { PyObject *name = GETITEM(frame->f_code->co_names, oparg); @@ -1562,12 +1568,12 @@ #endif /* ENABLE_SPECIALIZATION */ PyObject *name = GETITEM(frame->f_code->co_names, oparg); int err = PyObject_SetAttr(owner, name, v); - #line 1566 "Python/generated_cases.c.h" + #line 1572 "Python/generated_cases.c.h" Py_DECREF(v); Py_DECREF(owner); - #line 1137 "Python/bytecodes.c" + #line 1145 "Python/bytecodes.c" if (err) goto pop_2_error; - #line 1571 "Python/generated_cases.c.h" + #line 1577 "Python/generated_cases.c.h" STACK_SHRINK(2); next_instr += 4; DISPATCH(); @@ -1575,34 +1581,34 @@ TARGET(DELETE_ATTR) { PyObject *owner = stack_pointer[-1]; - #line 1141 "Python/bytecodes.c" + #line 1149 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); int err = PyObject_SetAttr(owner, name, (PyObject *)NULL); - #line 1582 "Python/generated_cases.c.h" + #line 1588 "Python/generated_cases.c.h" Py_DECREF(owner); - #line 1144 "Python/bytecodes.c" + #line 1152 "Python/bytecodes.c" if (err) goto pop_1_error; - #line 1586 "Python/generated_cases.c.h" + #line 1592 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(STORE_GLOBAL) { PyObject *v = stack_pointer[-1]; - #line 1148 "Python/bytecodes.c" + #line 1156 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); int err = PyDict_SetItem(GLOBALS(), name, v); - #line 1596 "Python/generated_cases.c.h" + #line 1602 "Python/generated_cases.c.h" Py_DECREF(v); - #line 1151 "Python/bytecodes.c" + #line 1159 "Python/bytecodes.c" if (err) goto pop_1_error; - #line 1600 "Python/generated_cases.c.h" + #line 1606 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(DELETE_GLOBAL) { - #line 1155 "Python/bytecodes.c" + #line 1163 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); int err; err = PyDict_DelItem(GLOBALS(), name); @@ -1614,13 +1620,13 @@ } goto error; } - #line 1618 "Python/generated_cases.c.h" + #line 1624 "Python/generated_cases.c.h" DISPATCH(); } TARGET(LOAD_LOCALS) { PyObject *locals; - #line 1169 "Python/bytecodes.c" + #line 1177 "Python/bytecodes.c" locals = LOCALS(); if (locals == NULL) { _PyErr_SetString(tstate, PyExc_SystemError, @@ -1628,7 +1634,7 @@ if (true) goto error; } Py_INCREF(locals); - #line 1632 "Python/generated_cases.c.h" + #line 1638 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = locals; DISPATCH(); @@ -1637,7 +1643,7 @@ TARGET(LOAD_FROM_DICT_OR_GLOBALS) { PyObject *mod_or_class_dict = stack_pointer[-1]; PyObject *v; - #line 1179 "Python/bytecodes.c" + #line 1187 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); if (PyDict_CheckExact(mod_or_class_dict)) { v = PyDict_GetItemWithError(mod_or_class_dict, name); @@ -1691,7 +1697,7 @@ } } } - #line 1695 "Python/generated_cases.c.h" + #line 1701 "Python/generated_cases.c.h" Py_DECREF(mod_or_class_dict); stack_pointer[-1] = v; DISPATCH(); @@ -1699,7 +1705,7 @@ TARGET(LOAD_NAME) { PyObject *v; - #line 1236 "Python/bytecodes.c" + #line 1244 "Python/bytecodes.c" PyObject *mod_or_class_dict = LOCALS(); if (mod_or_class_dict == NULL) { _PyErr_SetString(tstate, PyExc_SystemError, @@ -1759,7 +1765,7 @@ } } } - #line 1763 "Python/generated_cases.c.h" + #line 1769 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = v; DISPATCH(); @@ -1770,7 +1776,7 @@ static_assert(INLINE_CACHE_ENTRIES_LOAD_GLOBAL == 4, "incorrect cache size"); PyObject *null = NULL; PyObject *v; - #line 1304 "Python/bytecodes.c" + #line 1312 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyLoadGlobalCache *cache = (_PyLoadGlobalCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -1822,7 +1828,7 @@ } } null = NULL; - #line 1826 "Python/generated_cases.c.h" + #line 1832 "Python/generated_cases.c.h" STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = v; @@ -1836,7 +1842,7 @@ PyObject *res; uint16_t index = read_u16(&next_instr[1].cache); uint16_t version = read_u16(&next_instr[2].cache); - #line 1358 "Python/bytecodes.c" + #line 1366 "Python/bytecodes.c" DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); PyDictObject *dict = (PyDictObject *)GLOBALS(); DEOPT_IF(dict->ma_keys->dk_version != version, LOAD_GLOBAL); @@ -1847,7 +1853,7 @@ Py_INCREF(res); STAT_INC(LOAD_GLOBAL, hit); null = NULL; - #line 1851 "Python/generated_cases.c.h" + #line 1857 "Python/generated_cases.c.h" STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -1862,7 +1868,7 @@ uint16_t index = read_u16(&next_instr[1].cache); uint16_t mod_version = read_u16(&next_instr[2].cache); uint16_t bltn_version = read_u16(&next_instr[3].cache); - #line 1371 "Python/bytecodes.c" + #line 1379 "Python/bytecodes.c" DEOPT_IF(!PyDict_CheckExact(GLOBALS()), LOAD_GLOBAL); DEOPT_IF(!PyDict_CheckExact(BUILTINS()), LOAD_GLOBAL); PyDictObject *mdict = (PyDictObject *)GLOBALS(); @@ -1877,7 +1883,7 @@ Py_INCREF(res); STAT_INC(LOAD_GLOBAL, hit); null = NULL; - #line 1881 "Python/generated_cases.c.h" + #line 1887 "Python/generated_cases.c.h" STACK_GROW(1); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -1887,16 +1893,16 @@ } TARGET(DELETE_FAST) { - #line 1388 "Python/bytecodes.c" + #line 1396 "Python/bytecodes.c" PyObject *v = GETLOCAL(oparg); if (v == NULL) goto unbound_local_error; SETLOCAL(oparg, NULL); - #line 1895 "Python/generated_cases.c.h" + #line 1901 "Python/generated_cases.c.h" DISPATCH(); } TARGET(MAKE_CELL) { - #line 1394 "Python/bytecodes.c" + #line 1402 "Python/bytecodes.c" // "initial" is probably NULL but not if it's an arg (or set // via PyFrame_LocalsToFast() before MAKE_CELL has run). PyObject *initial = GETLOCAL(oparg); @@ -1905,12 +1911,12 @@ goto resume_with_error; } SETLOCAL(oparg, cell); - #line 1909 "Python/generated_cases.c.h" + #line 1915 "Python/generated_cases.c.h" DISPATCH(); } TARGET(DELETE_DEREF) { - #line 1405 "Python/bytecodes.c" + #line 1413 "Python/bytecodes.c" PyObject *cell = GETLOCAL(oparg); PyObject *oldobj = PyCell_GET(cell); // Can't use ERROR_IF here. @@ -1921,14 +1927,14 @@ } PyCell_SET(cell, NULL); Py_DECREF(oldobj); - #line 1925 "Python/generated_cases.c.h" + #line 1931 "Python/generated_cases.c.h" DISPATCH(); } TARGET(LOAD_FROM_DICT_OR_DEREF) { PyObject *class_dict = stack_pointer[-1]; PyObject *value; - #line 1418 "Python/bytecodes.c" + #line 1426 "Python/bytecodes.c" PyObject *name; assert(class_dict); assert(oparg >= 0 && oparg < frame->f_code->co_nlocalsplus); @@ -1963,14 +1969,14 @@ } Py_INCREF(value); } - #line 1967 "Python/generated_cases.c.h" + #line 1973 "Python/generated_cases.c.h" stack_pointer[-1] = value; DISPATCH(); } TARGET(LOAD_DEREF) { PyObject *value; - #line 1455 "Python/bytecodes.c" + #line 1463 "Python/bytecodes.c" PyObject *cell = GETLOCAL(oparg); value = PyCell_GET(cell); if (value == NULL) { @@ -1978,7 +1984,7 @@ if (true) goto error; } Py_INCREF(value); - #line 1982 "Python/generated_cases.c.h" + #line 1988 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = value; DISPATCH(); @@ -1986,18 +1992,18 @@ TARGET(STORE_DEREF) { PyObject *v = stack_pointer[-1]; - #line 1465 "Python/bytecodes.c" + #line 1473 "Python/bytecodes.c" PyObject *cell = GETLOCAL(oparg); PyObject *oldobj = PyCell_GET(cell); PyCell_SET(cell, v); Py_XDECREF(oldobj); - #line 1995 "Python/generated_cases.c.h" + #line 2001 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(COPY_FREE_VARS) { - #line 1472 "Python/bytecodes.c" + #line 1480 "Python/bytecodes.c" /* Copy closure variables to free variables */ PyCodeObject *co = frame->f_code; assert(PyFunction_Check(frame->f_funcobj)); @@ -2008,22 +2014,22 @@ PyObject *o = PyTuple_GET_ITEM(closure, i); frame->localsplus[offset + i] = Py_NewRef(o); } - #line 2012 "Python/generated_cases.c.h" + #line 2018 "Python/generated_cases.c.h" DISPATCH(); } TARGET(BUILD_STRING) { PyObject **pieces = (stack_pointer - oparg); PyObject *str; - #line 1485 "Python/bytecodes.c" + #line 1493 "Python/bytecodes.c" str = _PyUnicode_JoinArray(&_Py_STR(empty), pieces, oparg); - #line 2021 "Python/generated_cases.c.h" + #line 2027 "Python/generated_cases.c.h" for (int _i = oparg; --_i >= 0;) { Py_DECREF(pieces[_i]); } - #line 1487 "Python/bytecodes.c" + #line 1495 "Python/bytecodes.c" if (str == NULL) { STACK_SHRINK(oparg); goto error; } - #line 2027 "Python/generated_cases.c.h" + #line 2033 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = str; @@ -2033,10 +2039,10 @@ TARGET(BUILD_TUPLE) { PyObject **values = (stack_pointer - oparg); PyObject *tup; - #line 1491 "Python/bytecodes.c" + #line 1499 "Python/bytecodes.c" tup = _PyTuple_FromArraySteal(values, oparg); if (tup == NULL) { STACK_SHRINK(oparg); goto error; } - #line 2040 "Python/generated_cases.c.h" + #line 2046 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = tup; @@ -2046,10 +2052,10 @@ TARGET(BUILD_LIST) { PyObject **values = (stack_pointer - oparg); PyObject *list; - #line 1496 "Python/bytecodes.c" + #line 1504 "Python/bytecodes.c" list = _PyList_FromArraySteal(values, oparg); if (list == NULL) { STACK_SHRINK(oparg); goto error; } - #line 2053 "Python/generated_cases.c.h" + #line 2059 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = list; @@ -2059,7 +2065,7 @@ TARGET(LIST_EXTEND) { PyObject *iterable = stack_pointer[-1]; PyObject *list = stack_pointer[-(2 + (oparg-1))]; - #line 1501 "Python/bytecodes.c" + #line 1509 "Python/bytecodes.c" PyObject *none_val = _PyList_Extend((PyListObject *)list, iterable); if (none_val == NULL) { if (_PyErr_ExceptionMatches(tstate, PyExc_TypeError) && @@ -2070,13 +2076,13 @@ "Value after * must be an iterable, not %.200s", Py_TYPE(iterable)->tp_name); } - #line 2074 "Python/generated_cases.c.h" + #line 2080 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 1512 "Python/bytecodes.c" + #line 1520 "Python/bytecodes.c" if (true) goto pop_1_error; } assert(Py_IsNone(none_val)); - #line 2080 "Python/generated_cases.c.h" + #line 2086 "Python/generated_cases.c.h" Py_DECREF(iterable); STACK_SHRINK(1); DISPATCH(); @@ -2085,13 +2091,13 @@ TARGET(SET_UPDATE) { PyObject *iterable = stack_pointer[-1]; PyObject *set = stack_pointer[-(2 + (oparg-1))]; - #line 1519 "Python/bytecodes.c" + #line 1527 "Python/bytecodes.c" int err = _PySet_Update(set, iterable); - #line 2091 "Python/generated_cases.c.h" + #line 2097 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 1521 "Python/bytecodes.c" + #line 1529 "Python/bytecodes.c" if (err < 0) goto pop_1_error; - #line 2095 "Python/generated_cases.c.h" + #line 2101 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } @@ -2099,7 +2105,7 @@ TARGET(BUILD_SET) { PyObject **values = (stack_pointer - oparg); PyObject *set; - #line 1525 "Python/bytecodes.c" + #line 1533 "Python/bytecodes.c" set = PySet_New(NULL); if (set == NULL) goto error; @@ -2114,7 +2120,7 @@ Py_DECREF(set); if (true) { STACK_SHRINK(oparg); goto error; } } - #line 2118 "Python/generated_cases.c.h" + #line 2124 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_GROW(1); stack_pointer[-1] = set; @@ -2124,7 +2130,7 @@ TARGET(BUILD_MAP) { PyObject **values = (stack_pointer - oparg*2); PyObject *map; - #line 1542 "Python/bytecodes.c" + #line 1550 "Python/bytecodes.c" map = _PyDict_FromItems( values, 2, values+1, 2, @@ -2132,13 +2138,13 @@ if (map == NULL) goto error; - #line 2136 "Python/generated_cases.c.h" + #line 2142 "Python/generated_cases.c.h" for (int _i = oparg*2; --_i >= 0;) { Py_DECREF(values[_i]); } - #line 1550 "Python/bytecodes.c" + #line 1558 "Python/bytecodes.c" if (map == NULL) { STACK_SHRINK(oparg*2); goto error; } - #line 2142 "Python/generated_cases.c.h" + #line 2148 "Python/generated_cases.c.h" STACK_SHRINK(oparg*2); STACK_GROW(1); stack_pointer[-1] = map; @@ -2146,7 +2152,7 @@ } TARGET(SETUP_ANNOTATIONS) { - #line 1554 "Python/bytecodes.c" + #line 1562 "Python/bytecodes.c" int err; PyObject *ann_dict; if (LOCALS() == NULL) { @@ -2186,7 +2192,7 @@ Py_DECREF(ann_dict); } } - #line 2190 "Python/generated_cases.c.h" + #line 2196 "Python/generated_cases.c.h" DISPATCH(); } @@ -2194,7 +2200,7 @@ PyObject *keys = stack_pointer[-1]; PyObject **values = (stack_pointer - (1 + oparg)); PyObject *map; - #line 1596 "Python/bytecodes.c" + #line 1604 "Python/bytecodes.c" if (!PyTuple_CheckExact(keys) || PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) { _PyErr_SetString(tstate, PyExc_SystemError, @@ -2204,14 +2210,14 @@ map = _PyDict_FromItems( &PyTuple_GET_ITEM(keys, 0), 1, values, 1, oparg); - #line 2208 "Python/generated_cases.c.h" + #line 2214 "Python/generated_cases.c.h" for (int _i = oparg; --_i >= 0;) { Py_DECREF(values[_i]); } Py_DECREF(keys); - #line 1606 "Python/bytecodes.c" + #line 1614 "Python/bytecodes.c" if (map == NULL) { STACK_SHRINK(oparg); goto pop_1_error; } - #line 2215 "Python/generated_cases.c.h" + #line 2221 "Python/generated_cases.c.h" STACK_SHRINK(oparg); stack_pointer[-1] = map; DISPATCH(); @@ -2219,7 +2225,7 @@ TARGET(DICT_UPDATE) { PyObject *update = stack_pointer[-1]; - #line 1610 "Python/bytecodes.c" + #line 1618 "Python/bytecodes.c" PyObject *dict = PEEK(oparg + 1); // update is still on the stack if (PyDict_Update(dict, update) < 0) { if (_PyErr_ExceptionMatches(tstate, PyExc_AttributeError)) { @@ -2227,12 +2233,12 @@ "'%.200s' object is not a mapping", Py_TYPE(update)->tp_name); } - #line 2231 "Python/generated_cases.c.h" + #line 2237 "Python/generated_cases.c.h" Py_DECREF(update); - #line 1618 "Python/bytecodes.c" + #line 1626 "Python/bytecodes.c" if (true) goto pop_1_error; } - #line 2236 "Python/generated_cases.c.h" + #line 2242 "Python/generated_cases.c.h" Py_DECREF(update); STACK_SHRINK(1); DISPATCH(); @@ -2240,17 +2246,17 @@ TARGET(DICT_MERGE) { PyObject *update = stack_pointer[-1]; - #line 1624 "Python/bytecodes.c" + #line 1632 "Python/bytecodes.c" PyObject *dict = PEEK(oparg + 1); // update is still on the stack if (_PyDict_MergeEx(dict, update, 2) < 0) { format_kwargs_error(tstate, PEEK(3 + oparg), update); - #line 2249 "Python/generated_cases.c.h" + #line 2255 "Python/generated_cases.c.h" Py_DECREF(update); - #line 1629 "Python/bytecodes.c" + #line 1637 "Python/bytecodes.c" if (true) goto pop_1_error; } - #line 2254 "Python/generated_cases.c.h" + #line 2260 "Python/generated_cases.c.h" Py_DECREF(update); STACK_SHRINK(1); PREDICT(CALL_FUNCTION_EX); @@ -2260,26 +2266,26 @@ TARGET(MAP_ADD) { PyObject *value = stack_pointer[-1]; PyObject *key = stack_pointer[-2]; - #line 1636 "Python/bytecodes.c" + #line 1644 "Python/bytecodes.c" PyObject *dict = PEEK(oparg + 2); // key, value are still on the stack assert(PyDict_CheckExact(dict)); /* dict[key] = value */ // Do not DECREF INPUTS because the function steals the references if (_PyDict_SetItem_Take2((PyDictObject *)dict, key, value) != 0) goto pop_2_error; - #line 2270 "Python/generated_cases.c.h" + #line 2276 "Python/generated_cases.c.h" STACK_SHRINK(2); PREDICT(JUMP_BACKWARD); DISPATCH(); } TARGET(INSTRUMENTED_LOAD_SUPER_ATTR) { - #line 1645 "Python/bytecodes.c" + #line 1653 "Python/bytecodes.c" _PySuperAttrCache *cache = (_PySuperAttrCache *)next_instr; // cancel out the decrement that will happen in LOAD_SUPER_ATTR; we // don't want to specialize instrumented instructions INCREMENT_ADAPTIVE_COUNTER(cache->counter); GO_TO_INSTRUCTION(LOAD_SUPER_ATTR); - #line 2283 "Python/generated_cases.c.h" + #line 2289 "Python/generated_cases.c.h" } TARGET(LOAD_SUPER_ATTR) { @@ -2290,7 +2296,7 @@ PyObject *global_super = stack_pointer[-3]; PyObject *res2 = NULL; PyObject *res; - #line 1659 "Python/bytecodes.c" + #line 1667 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg >> 2); int load_method = oparg & 1; #if ENABLE_SPECIALIZATION @@ -2332,16 +2338,16 @@ } } } - #line 2336 "Python/generated_cases.c.h" + #line 2342 "Python/generated_cases.c.h" Py_DECREF(global_super); Py_DECREF(class); Py_DECREF(self); - #line 1701 "Python/bytecodes.c" + #line 1709 "Python/bytecodes.c" if (super == NULL) goto pop_3_error; res = PyObject_GetAttr(super, name); Py_DECREF(super); if (res == NULL) goto pop_3_error; - #line 2345 "Python/generated_cases.c.h" + #line 2351 "Python/generated_cases.c.h" STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2356,20 +2362,20 @@ PyObject *global_super = stack_pointer[-3]; PyObject *res2 = NULL; PyObject *res; - #line 1708 "Python/bytecodes.c" + #line 1716 "Python/bytecodes.c" assert(!(oparg & 1)); DEOPT_IF(global_super != (PyObject *)&PySuper_Type, LOAD_SUPER_ATTR); DEOPT_IF(!PyType_Check(class), LOAD_SUPER_ATTR); STAT_INC(LOAD_SUPER_ATTR, hit); PyObject *name = GETITEM(frame->f_code->co_names, oparg >> 2); res = _PySuper_Lookup((PyTypeObject *)class, self, name, NULL); - #line 2367 "Python/generated_cases.c.h" + #line 2373 "Python/generated_cases.c.h" Py_DECREF(global_super); Py_DECREF(class); Py_DECREF(self); - #line 1715 "Python/bytecodes.c" + #line 1723 "Python/bytecodes.c" if (res == NULL) goto pop_3_error; - #line 2373 "Python/generated_cases.c.h" + #line 2379 "Python/generated_cases.c.h" STACK_SHRINK(2); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2384,7 +2390,7 @@ PyObject *global_super = stack_pointer[-3]; PyObject *res2; PyObject *res; - #line 1719 "Python/bytecodes.c" + #line 1727 "Python/bytecodes.c" assert(oparg & 1); DEOPT_IF(global_super != (PyObject *)&PySuper_Type, LOAD_SUPER_ATTR); DEOPT_IF(!PyType_Check(class), LOAD_SUPER_ATTR); @@ -2407,7 +2413,7 @@ res = res2; res2 = NULL; } - #line 2411 "Python/generated_cases.c.h" + #line 2417 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; stack_pointer[-2] = res2; @@ -2421,7 +2427,7 @@ PyObject *owner = stack_pointer[-1]; PyObject *res2 = NULL; PyObject *res; - #line 1758 "Python/bytecodes.c" + #line 1766 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyAttrCache *cache = (_PyAttrCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -2455,9 +2461,9 @@ NULL | meth | arg1 | ... | argN */ - #line 2459 "Python/generated_cases.c.h" + #line 2465 "Python/generated_cases.c.h" Py_DECREF(owner); - #line 1792 "Python/bytecodes.c" + #line 1800 "Python/bytecodes.c" if (meth == NULL) goto pop_1_error; res2 = NULL; res = meth; @@ -2466,12 +2472,12 @@ else { /* Classic, pushes one value. */ res = PyObject_GetAttr(owner, name); - #line 2470 "Python/generated_cases.c.h" + #line 2476 "Python/generated_cases.c.h" Py_DECREF(owner); - #line 1801 "Python/bytecodes.c" + #line 1809 "Python/bytecodes.c" if (res == NULL) goto pop_1_error; } - #line 2475 "Python/generated_cases.c.h" + #line 2481 "Python/generated_cases.c.h" STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } @@ -2485,7 +2491,7 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1806 "Python/bytecodes.c" + #line 1825 "Python/bytecodes.c" PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); @@ -2498,7 +2504,7 @@ STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); res2 = NULL; - #line 2502 "Python/generated_cases.c.h" + #line 2508 "Python/generated_cases.c.h" Py_DECREF(owner); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2513,7 +2519,7 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1822 "Python/bytecodes.c" + #line 1841 "Python/bytecodes.c" DEOPT_IF(!PyModule_CheckExact(owner), LOAD_ATTR); PyDictObject *dict = (PyDictObject *)((PyModuleObject *)owner)->md_dict; assert(dict != NULL); @@ -2526,7 +2532,7 @@ STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); res2 = NULL; - #line 2530 "Python/generated_cases.c.h" + #line 2536 "Python/generated_cases.c.h" Py_DECREF(owner); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2541,7 +2547,7 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1838 "Python/bytecodes.c" + #line 1874 "Python/bytecodes.c" PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); @@ -2568,7 +2574,7 @@ STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); res2 = NULL; - #line 2572 "Python/generated_cases.c.h" + #line 2578 "Python/generated_cases.c.h" Py_DECREF(owner); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2583,7 +2589,7 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1868 "Python/bytecodes.c" + #line 1904 "Python/bytecodes.c" PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, LOAD_ATTR); @@ -2593,7 +2599,7 @@ STAT_INC(LOAD_ATTR, hit); Py_INCREF(res); res2 = NULL; - #line 2597 "Python/generated_cases.c.h" + #line 2603 "Python/generated_cases.c.h" Py_DECREF(owner); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2608,7 +2614,7 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); PyObject *descr = read_obj(&next_instr[5].cache); - #line 1881 "Python/bytecodes.c" + #line 1917 "Python/bytecodes.c" DEOPT_IF(!PyType_Check(cls), LOAD_ATTR); DEOPT_IF(((PyTypeObject *)cls)->tp_version_tag != type_version, @@ -2620,7 +2626,7 @@ res = descr; assert(res != NULL); Py_INCREF(res); - #line 2624 "Python/generated_cases.c.h" + #line 2630 "Python/generated_cases.c.h" Py_DECREF(cls); STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; @@ -2634,7 +2640,7 @@ uint32_t type_version = read_u32(&next_instr[1].cache); uint32_t func_version = read_u32(&next_instr[3].cache); PyObject *fget = read_obj(&next_instr[5].cache); - #line 1896 "Python/bytecodes.c" + #line 1932 "Python/bytecodes.c" DEOPT_IF(tstate->interp->eval_frame, LOAD_ATTR); PyTypeObject *cls = Py_TYPE(owner); @@ -2658,7 +2664,7 @@ JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); frame->return_offset = 0; DISPATCH_INLINED(new_frame); - #line 2662 "Python/generated_cases.c.h" + #line 2668 "Python/generated_cases.c.h" } TARGET(LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN) { @@ -2666,7 +2672,7 @@ uint32_t type_version = read_u32(&next_instr[1].cache); uint32_t func_version = read_u32(&next_instr[3].cache); PyObject *getattribute = read_obj(&next_instr[5].cache); - #line 1922 "Python/bytecodes.c" + #line 1958 "Python/bytecodes.c" DEOPT_IF(tstate->interp->eval_frame, LOAD_ATTR); PyTypeObject *cls = Py_TYPE(owner); DEOPT_IF(cls->tp_version_tag != type_version, LOAD_ATTR); @@ -2692,7 +2698,7 @@ JUMPBY(INLINE_CACHE_ENTRIES_LOAD_ATTR); frame->return_offset = 0; DISPATCH_INLINED(new_frame); - #line 2696 "Python/generated_cases.c.h" + #line 2702 "Python/generated_cases.c.h" } TARGET(STORE_ATTR_INSTANCE_VALUE) { @@ -2700,7 +2706,7 @@ PyObject *value = stack_pointer[-2]; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 1950 "Python/bytecodes.c" + #line 1986 "Python/bytecodes.c" PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); @@ -2718,7 +2724,7 @@ Py_DECREF(old_value); } Py_DECREF(owner); - #line 2722 "Python/generated_cases.c.h" + #line 2728 "Python/generated_cases.c.h" STACK_SHRINK(2); next_instr += 4; DISPATCH(); @@ -2729,7 +2735,7 @@ PyObject *value = stack_pointer[-2]; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t hint = read_u16(&next_instr[3].cache); - #line 1970 "Python/bytecodes.c" + #line 2006 "Python/bytecodes.c" PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); @@ -2768,7 +2774,7 @@ /* PEP 509 */ dict->ma_version_tag = new_version; Py_DECREF(owner); - #line 2772 "Python/generated_cases.c.h" + #line 2778 "Python/generated_cases.c.h" STACK_SHRINK(2); next_instr += 4; DISPATCH(); @@ -2779,7 +2785,7 @@ PyObject *value = stack_pointer[-2]; uint32_t type_version = read_u32(&next_instr[1].cache); uint16_t index = read_u16(&next_instr[3].cache); - #line 2011 "Python/bytecodes.c" + #line 2047 "Python/bytecodes.c" PyTypeObject *tp = Py_TYPE(owner); assert(type_version != 0); DEOPT_IF(tp->tp_version_tag != type_version, STORE_ATTR); @@ -2789,7 +2795,7 @@ *(PyObject **)addr = value; Py_XDECREF(old_value); Py_DECREF(owner); - #line 2793 "Python/generated_cases.c.h" + #line 2799 "Python/generated_cases.c.h" STACK_SHRINK(2); next_instr += 4; DISPATCH(); @@ -2801,7 +2807,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 2030 "Python/bytecodes.c" + #line 2066 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyCompareOpCache *cache = (_PyCompareOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -2814,12 +2820,12 @@ #endif /* ENABLE_SPECIALIZATION */ assert((oparg >> 4) <= Py_GE); res = PyObject_RichCompare(left, right, oparg>>4); - #line 2818 "Python/generated_cases.c.h" + #line 2824 "Python/generated_cases.c.h" Py_DECREF(left); Py_DECREF(right); - #line 2043 "Python/bytecodes.c" + #line 2079 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 2823 "Python/generated_cases.c.h" + #line 2829 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -2830,7 +2836,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 2047 "Python/bytecodes.c" + #line 2083 "Python/bytecodes.c" DEOPT_IF(!PyFloat_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyFloat_CheckExact(right), COMPARE_OP); STAT_INC(COMPARE_OP, hit); @@ -2841,7 +2847,7 @@ _Py_DECREF_SPECIALIZED(left, _PyFloat_ExactDealloc); _Py_DECREF_SPECIALIZED(right, _PyFloat_ExactDealloc); res = (sign_ish & oparg) ? Py_True : Py_False; - #line 2845 "Python/generated_cases.c.h" + #line 2851 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -2852,7 +2858,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 2061 "Python/bytecodes.c" + #line 2097 "Python/bytecodes.c" DEOPT_IF(!PyLong_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyLong_CheckExact(right), COMPARE_OP); DEOPT_IF(!_PyLong_IsCompact((PyLongObject *)left), COMPARE_OP); @@ -2867,7 +2873,7 @@ _Py_DECREF_SPECIALIZED(left, (destructor)PyObject_Free); _Py_DECREF_SPECIALIZED(right, (destructor)PyObject_Free); res = (sign_ish & oparg) ? Py_True : Py_False; - #line 2871 "Python/generated_cases.c.h" + #line 2877 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -2878,7 +2884,7 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *res; - #line 2079 "Python/bytecodes.c" + #line 2115 "Python/bytecodes.c" DEOPT_IF(!PyUnicode_CheckExact(left), COMPARE_OP); DEOPT_IF(!PyUnicode_CheckExact(right), COMPARE_OP); STAT_INC(COMPARE_OP, hit); @@ -2890,7 +2896,7 @@ assert((oparg & 0xf) == COMPARISON_NOT_EQUALS || (oparg & 0xf) == COMPARISON_EQUALS); assert(COMPARISON_NOT_EQUALS + 1 == COMPARISON_EQUALS); res = ((COMPARISON_NOT_EQUALS + eq) & oparg) ? Py_True : Py_False; - #line 2894 "Python/generated_cases.c.h" + #line 2900 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; next_instr += 1; @@ -2901,14 +2907,14 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *b; - #line 2093 "Python/bytecodes.c" + #line 2129 "Python/bytecodes.c" int res = Py_Is(left, right) ^ oparg; - #line 2907 "Python/generated_cases.c.h" + #line 2913 "Python/generated_cases.c.h" Py_DECREF(left); Py_DECREF(right); - #line 2095 "Python/bytecodes.c" + #line 2131 "Python/bytecodes.c" b = res ? Py_True : Py_False; - #line 2912 "Python/generated_cases.c.h" + #line 2918 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = b; DISPATCH(); @@ -2918,15 +2924,15 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *b; - #line 2099 "Python/bytecodes.c" + #line 2135 "Python/bytecodes.c" int res = PySequence_Contains(right, left); - #line 2924 "Python/generated_cases.c.h" + #line 2930 "Python/generated_cases.c.h" Py_DECREF(left); Py_DECREF(right); - #line 2101 "Python/bytecodes.c" + #line 2137 "Python/bytecodes.c" if (res < 0) goto pop_2_error; b = (res ^ oparg) ? Py_True : Py_False; - #line 2930 "Python/generated_cases.c.h" + #line 2936 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = b; DISPATCH(); @@ -2937,12 +2943,12 @@ PyObject *exc_value = stack_pointer[-2]; PyObject *rest; PyObject *match; - #line 2106 "Python/bytecodes.c" + #line 2142 "Python/bytecodes.c" if (check_except_star_type_valid(tstate, match_type) < 0) { - #line 2943 "Python/generated_cases.c.h" + #line 2949 "Python/generated_cases.c.h" Py_DECREF(exc_value); Py_DECREF(match_type); - #line 2108 "Python/bytecodes.c" + #line 2144 "Python/bytecodes.c" if (true) goto pop_2_error; } @@ -2950,10 +2956,10 @@ rest = NULL; int res = exception_group_match(exc_value, match_type, &match, &rest); - #line 2954 "Python/generated_cases.c.h" + #line 2960 "Python/generated_cases.c.h" Py_DECREF(exc_value); Py_DECREF(match_type); - #line 2116 "Python/bytecodes.c" + #line 2152 "Python/bytecodes.c" if (res < 0) goto pop_2_error; assert((match == NULL) == (rest == NULL)); @@ -2962,7 +2968,7 @@ if (!Py_IsNone(match)) { PyErr_SetHandledException(match); } - #line 2966 "Python/generated_cases.c.h" + #line 2972 "Python/generated_cases.c.h" stack_pointer[-1] = match; stack_pointer[-2] = rest; DISPATCH(); @@ -2972,21 +2978,21 @@ PyObject *right = stack_pointer[-1]; PyObject *left = stack_pointer[-2]; PyObject *b; - #line 2127 "Python/bytecodes.c" + #line 2163 "Python/bytecodes.c" assert(PyExceptionInstance_Check(left)); if (check_except_type_valid(tstate, right) < 0) { - #line 2979 "Python/generated_cases.c.h" + #line 2985 "Python/generated_cases.c.h" Py_DECREF(right); - #line 2130 "Python/bytecodes.c" + #line 2166 "Python/bytecodes.c" if (true) goto pop_1_error; } int res = PyErr_GivenExceptionMatches(left, right); - #line 2986 "Python/generated_cases.c.h" + #line 2992 "Python/generated_cases.c.h" Py_DECREF(right); - #line 2135 "Python/bytecodes.c" + #line 2171 "Python/bytecodes.c" b = res ? Py_True : Py_False; - #line 2990 "Python/generated_cases.c.h" + #line 2996 "Python/generated_cases.c.h" stack_pointer[-1] = b; DISPATCH(); } @@ -2995,15 +3001,15 @@ PyObject *fromlist = stack_pointer[-1]; PyObject *level = stack_pointer[-2]; PyObject *res; - #line 2139 "Python/bytecodes.c" + #line 2175 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); res = import_name(tstate, frame, name, fromlist, level); - #line 3002 "Python/generated_cases.c.h" + #line 3008 "Python/generated_cases.c.h" Py_DECREF(level); Py_DECREF(fromlist); - #line 2142 "Python/bytecodes.c" + #line 2178 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 3007 "Python/generated_cases.c.h" + #line 3013 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; DISPATCH(); @@ -3012,29 +3018,29 @@ TARGET(IMPORT_FROM) { PyObject *from = stack_pointer[-1]; PyObject *res; - #line 2146 "Python/bytecodes.c" + #line 2182 "Python/bytecodes.c" PyObject *name = GETITEM(frame->f_code->co_names, oparg); res = import_from(tstate, from, name); if (res == NULL) goto error; - #line 3020 "Python/generated_cases.c.h" + #line 3026 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; DISPATCH(); } TARGET(JUMP_FORWARD) { - #line 2152 "Python/bytecodes.c" + #line 2188 "Python/bytecodes.c" JUMPBY(oparg); - #line 3029 "Python/generated_cases.c.h" + #line 3035 "Python/generated_cases.c.h" DISPATCH(); } TARGET(JUMP_BACKWARD) { PREDICTED(JUMP_BACKWARD); - #line 2156 "Python/bytecodes.c" + #line 2192 "Python/bytecodes.c" assert(oparg < INSTR_OFFSET()); JUMPBY(-oparg); - #line 3038 "Python/generated_cases.c.h" + #line 3044 "Python/generated_cases.c.h" CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3042,15 +3048,15 @@ TARGET(POP_JUMP_IF_FALSE) { PREDICTED(POP_JUMP_IF_FALSE); PyObject *cond = stack_pointer[-1]; - #line 2162 "Python/bytecodes.c" + #line 2198 "Python/bytecodes.c" if (Py_IsFalse(cond)) { JUMPBY(oparg); } else if (!Py_IsTrue(cond)) { int err = PyObject_IsTrue(cond); - #line 3052 "Python/generated_cases.c.h" + #line 3058 "Python/generated_cases.c.h" Py_DECREF(cond); - #line 2168 "Python/bytecodes.c" + #line 2204 "Python/bytecodes.c" if (err == 0) { JUMPBY(oparg); } @@ -3058,22 +3064,22 @@ if (err < 0) goto pop_1_error; } } - #line 3062 "Python/generated_cases.c.h" + #line 3068 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(POP_JUMP_IF_TRUE) { PyObject *cond = stack_pointer[-1]; - #line 2178 "Python/bytecodes.c" + #line 2214 "Python/bytecodes.c" if (Py_IsTrue(cond)) { JUMPBY(oparg); } else if (!Py_IsFalse(cond)) { int err = PyObject_IsTrue(cond); - #line 3075 "Python/generated_cases.c.h" + #line 3081 "Python/generated_cases.c.h" Py_DECREF(cond); - #line 2184 "Python/bytecodes.c" + #line 2220 "Python/bytecodes.c" if (err > 0) { JUMPBY(oparg); } @@ -3081,63 +3087,63 @@ if (err < 0) goto pop_1_error; } } - #line 3085 "Python/generated_cases.c.h" + #line 3091 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(POP_JUMP_IF_NOT_NONE) { PyObject *value = stack_pointer[-1]; - #line 2194 "Python/bytecodes.c" + #line 2230 "Python/bytecodes.c" if (!Py_IsNone(value)) { - #line 3094 "Python/generated_cases.c.h" + #line 3100 "Python/generated_cases.c.h" Py_DECREF(value); - #line 2196 "Python/bytecodes.c" + #line 2232 "Python/bytecodes.c" JUMPBY(oparg); } - #line 3099 "Python/generated_cases.c.h" + #line 3105 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(POP_JUMP_IF_NONE) { PyObject *value = stack_pointer[-1]; - #line 2201 "Python/bytecodes.c" + #line 2237 "Python/bytecodes.c" if (Py_IsNone(value)) { JUMPBY(oparg); } else { - #line 3111 "Python/generated_cases.c.h" + #line 3117 "Python/generated_cases.c.h" Py_DECREF(value); - #line 2206 "Python/bytecodes.c" + #line 2242 "Python/bytecodes.c" } - #line 3115 "Python/generated_cases.c.h" + #line 3121 "Python/generated_cases.c.h" STACK_SHRINK(1); DISPATCH(); } TARGET(JUMP_BACKWARD_NO_INTERRUPT) { - #line 2210 "Python/bytecodes.c" + #line 2246 "Python/bytecodes.c" /* This bytecode is used in the `yield from` or `await` loop. * If there is an interrupt, we want it handled in the innermost * generator or coroutine, so we deliberately do not check it here. * (see bpo-30039). */ JUMPBY(-oparg); - #line 3128 "Python/generated_cases.c.h" + #line 3134 "Python/generated_cases.c.h" DISPATCH(); } TARGET(GET_LEN) { PyObject *obj = stack_pointer[-1]; PyObject *len_o; - #line 2219 "Python/bytecodes.c" + #line 2255 "Python/bytecodes.c" // PUSH(len(TOS)) Py_ssize_t len_i = PyObject_Length(obj); if (len_i < 0) goto error; len_o = PyLong_FromSsize_t(len_i); if (len_o == NULL) goto error; - #line 3141 "Python/generated_cases.c.h" + #line 3147 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = len_o; DISPATCH(); @@ -3148,16 +3154,16 @@ PyObject *type = stack_pointer[-2]; PyObject *subject = stack_pointer[-3]; PyObject *attrs; - #line 2227 "Python/bytecodes.c" + #line 2263 "Python/bytecodes.c" // Pop TOS and TOS1. Set TOS to a tuple of attributes on success, or // None on failure. assert(PyTuple_CheckExact(names)); attrs = match_class(tstate, subject, type, oparg, names); - #line 3157 "Python/generated_cases.c.h" + #line 3163 "Python/generated_cases.c.h" Py_DECREF(subject); Py_DECREF(type); Py_DECREF(names); - #line 2232 "Python/bytecodes.c" + #line 2268 "Python/bytecodes.c" if (attrs) { assert(PyTuple_CheckExact(attrs)); // Success! } @@ -3165,7 +3171,7 @@ if (_PyErr_Occurred(tstate)) goto pop_3_error; attrs = Py_None; // Failure! } - #line 3169 "Python/generated_cases.c.h" + #line 3175 "Python/generated_cases.c.h" STACK_SHRINK(2); stack_pointer[-1] = attrs; DISPATCH(); @@ -3174,10 +3180,10 @@ TARGET(MATCH_MAPPING) { PyObject *subject = stack_pointer[-1]; PyObject *res; - #line 2242 "Python/bytecodes.c" + #line 2278 "Python/bytecodes.c" int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_MAPPING; res = match ? Py_True : Py_False; - #line 3181 "Python/generated_cases.c.h" + #line 3187 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; PREDICT(POP_JUMP_IF_FALSE); @@ -3187,10 +3193,10 @@ TARGET(MATCH_SEQUENCE) { PyObject *subject = stack_pointer[-1]; PyObject *res; - #line 2248 "Python/bytecodes.c" + #line 2284 "Python/bytecodes.c" int match = Py_TYPE(subject)->tp_flags & Py_TPFLAGS_SEQUENCE; res = match ? Py_True : Py_False; - #line 3194 "Python/generated_cases.c.h" + #line 3200 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; PREDICT(POP_JUMP_IF_FALSE); @@ -3201,11 +3207,11 @@ PyObject *keys = stack_pointer[-1]; PyObject *subject = stack_pointer[-2]; PyObject *values_or_none; - #line 2254 "Python/bytecodes.c" + #line 2290 "Python/bytecodes.c" // On successful match, PUSH(values). Otherwise, PUSH(None). values_or_none = match_keys(tstate, subject, keys); if (values_or_none == NULL) goto error; - #line 3209 "Python/generated_cases.c.h" + #line 3215 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = values_or_none; DISPATCH(); @@ -3214,14 +3220,14 @@ TARGET(GET_ITER) { PyObject *iterable = stack_pointer[-1]; PyObject *iter; - #line 2260 "Python/bytecodes.c" + #line 2296 "Python/bytecodes.c" /* before: [obj]; after [getiter(obj)] */ iter = PyObject_GetIter(iterable); - #line 3221 "Python/generated_cases.c.h" + #line 3227 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 2263 "Python/bytecodes.c" + #line 2299 "Python/bytecodes.c" if (iter == NULL) goto pop_1_error; - #line 3225 "Python/generated_cases.c.h" + #line 3231 "Python/generated_cases.c.h" stack_pointer[-1] = iter; DISPATCH(); } @@ -3229,7 +3235,7 @@ TARGET(GET_YIELD_FROM_ITER) { PyObject *iterable = stack_pointer[-1]; PyObject *iter; - #line 2267 "Python/bytecodes.c" + #line 2303 "Python/bytecodes.c" /* before: [obj]; after [getiter(obj)] */ if (PyCoro_CheckExact(iterable)) { /* `iterable` is a coroutine */ @@ -3252,11 +3258,11 @@ if (iter == NULL) { goto error; } - #line 3256 "Python/generated_cases.c.h" + #line 3262 "Python/generated_cases.c.h" Py_DECREF(iterable); - #line 2290 "Python/bytecodes.c" + #line 2326 "Python/bytecodes.c" } - #line 3260 "Python/generated_cases.c.h" + #line 3266 "Python/generated_cases.c.h" stack_pointer[-1] = iter; PREDICT(LOAD_CONST); DISPATCH(); @@ -3267,7 +3273,7 @@ static_assert(INLINE_CACHE_ENTRIES_FOR_ITER == 1, "incorrect cache size"); PyObject *iter = stack_pointer[-1]; PyObject *next; - #line 2309 "Python/bytecodes.c" + #line 2345 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyForIterCache *cache = (_PyForIterCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { @@ -3298,7 +3304,7 @@ DISPATCH(); } // Common case: no jump, leave it to the code generator - #line 3302 "Python/generated_cases.c.h" + #line 3308 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = next; next_instr += 1; @@ -3306,7 +3312,7 @@ } TARGET(INSTRUMENTED_FOR_ITER) { - #line 2342 "Python/bytecodes.c" + #line 2378 "Python/bytecodes.c" _Py_CODEUNIT *here = next_instr-1; _Py_CODEUNIT *target; PyObject *iter = TOP(); @@ -3332,14 +3338,14 @@ target = next_instr + INLINE_CACHE_ENTRIES_FOR_ITER + oparg + 1; } INSTRUMENTED_JUMP(here, target, PY_MONITORING_EVENT_BRANCH); - #line 3336 "Python/generated_cases.c.h" + #line 3342 "Python/generated_cases.c.h" DISPATCH(); } TARGET(FOR_ITER_LIST) { PyObject *iter = stack_pointer[-1]; PyObject *next; - #line 2370 "Python/bytecodes.c" + #line 2406 "Python/bytecodes.c" DEOPT_IF(Py_TYPE(iter) != &PyListIter_Type, FOR_ITER); _PyListIterObject *it = (_PyListIterObject *)iter; STAT_INC(FOR_ITER, hit); @@ -3359,7 +3365,7 @@ DISPATCH(); end_for_iter_list: // Common case: no jump, leave it to the code generator - #line 3363 "Python/generated_cases.c.h" + #line 3369 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = next; next_instr += 1; @@ -3369,7 +3375,7 @@ TARGET(FOR_ITER_TUPLE) { PyObject *iter = stack_pointer[-1]; PyObject *next; - #line 2392 "Python/bytecodes.c" + #line 2428 "Python/bytecodes.c" _PyTupleIterObject *it = (_PyTupleIterObject *)iter; DEOPT_IF(Py_TYPE(it) != &PyTupleIter_Type, FOR_ITER); STAT_INC(FOR_ITER, hit); @@ -3389,7 +3395,7 @@ DISPATCH(); end_for_iter_tuple: // Common case: no jump, leave it to the code generator - #line 3393 "Python/generated_cases.c.h" + #line 3399 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = next; next_instr += 1; @@ -3399,7 +3405,7 @@ TARGET(FOR_ITER_RANGE) { PyObject *iter = stack_pointer[-1]; PyObject *next; - #line 2414 "Python/bytecodes.c" + #line 2450 "Python/bytecodes.c" _PyRangeIterObject *r = (_PyRangeIterObject *)iter; DEOPT_IF(Py_TYPE(r) != &PyRangeIter_Type, FOR_ITER); STAT_INC(FOR_ITER, hit); @@ -3417,7 +3423,7 @@ if (next == NULL) { goto error; } - #line 3421 "Python/generated_cases.c.h" + #line 3427 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = next; next_instr += 1; @@ -3426,7 +3432,7 @@ TARGET(FOR_ITER_GEN) { PyObject *iter = stack_pointer[-1]; - #line 2434 "Python/bytecodes.c" + #line 2470 "Python/bytecodes.c" DEOPT_IF(tstate->interp->eval_frame, FOR_ITER); PyGenObject *gen = (PyGenObject *)iter; DEOPT_IF(Py_TYPE(gen) != &PyGen_Type, FOR_ITER); @@ -3442,14 +3448,14 @@ assert(next_instr[oparg].op.code == END_FOR || next_instr[oparg].op.code == INSTRUMENTED_END_FOR); DISPATCH_INLINED(gen_frame); - #line 3446 "Python/generated_cases.c.h" + #line 3452 "Python/generated_cases.c.h" } TARGET(BEFORE_ASYNC_WITH) { PyObject *mgr = stack_pointer[-1]; PyObject *exit; PyObject *res; - #line 2452 "Python/bytecodes.c" + #line 2488 "Python/bytecodes.c" PyObject *enter = _PyObject_LookupSpecial(mgr, &_Py_ID(__aenter__)); if (enter == NULL) { if (!_PyErr_Occurred(tstate)) { @@ -3472,16 +3478,16 @@ Py_DECREF(enter); goto error; } - #line 3476 "Python/generated_cases.c.h" + #line 3482 "Python/generated_cases.c.h" Py_DECREF(mgr); - #line 2475 "Python/bytecodes.c" + #line 2511 "Python/bytecodes.c" res = _PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); if (true) goto pop_1_error; } - #line 3485 "Python/generated_cases.c.h" + #line 3491 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; stack_pointer[-2] = exit; @@ -3493,7 +3499,7 @@ PyObject *mgr = stack_pointer[-1]; PyObject *exit; PyObject *res; - #line 2485 "Python/bytecodes.c" + #line 2521 "Python/bytecodes.c" /* pop the context manager, push its __exit__ and the * value returned from calling its __enter__ */ @@ -3519,16 +3525,16 @@ Py_DECREF(enter); goto error; } - #line 3523 "Python/generated_cases.c.h" + #line 3529 "Python/generated_cases.c.h" Py_DECREF(mgr); - #line 2511 "Python/bytecodes.c" + #line 2547 "Python/bytecodes.c" res = _PyObject_CallNoArgs(enter); Py_DECREF(enter); if (res == NULL) { Py_DECREF(exit); if (true) goto pop_1_error; } - #line 3532 "Python/generated_cases.c.h" + #line 3538 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; stack_pointer[-2] = exit; @@ -3540,7 +3546,7 @@ PyObject *lasti = stack_pointer[-3]; PyObject *exit_func = stack_pointer[-4]; PyObject *res; - #line 2520 "Python/bytecodes.c" + #line 2556 "Python/bytecodes.c" /* At the top of the stack are 4 values: - val: TOP = exc_info() - unused: SECOND = previous exception @@ -3566,7 +3572,7 @@ res = PyObject_Vectorcall(exit_func, stack + 1, 3 | PY_VECTORCALL_ARGUMENTS_OFFSET, NULL); if (res == NULL) goto error; - #line 3570 "Python/generated_cases.c.h" + #line 3576 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = res; DISPATCH(); @@ -3575,7 +3581,7 @@ TARGET(PUSH_EXC_INFO) { PyObject *new_exc = stack_pointer[-1]; PyObject *prev_exc; - #line 2548 "Python/bytecodes.c" + #line 2584 "Python/bytecodes.c" _PyErr_StackItem *exc_info = tstate->exc_info; if (exc_info->exc_value != NULL) { prev_exc = exc_info->exc_value; @@ -3585,7 +3591,7 @@ } assert(PyExceptionInstance_Check(new_exc)); exc_info->exc_value = Py_NewRef(new_exc); - #line 3589 "Python/generated_cases.c.h" + #line 3595 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = new_exc; stack_pointer[-2] = prev_exc; @@ -3599,7 +3605,7 @@ uint32_t type_version = read_u32(&next_instr[1].cache); uint32_t keys_version = read_u32(&next_instr[3].cache); PyObject *descr = read_obj(&next_instr[5].cache); - #line 2560 "Python/bytecodes.c" + #line 2596 "Python/bytecodes.c" /* Cached method object */ PyTypeObject *self_cls = Py_TYPE(self); assert(type_version != 0); @@ -3616,7 +3622,7 @@ assert(_PyType_HasFeature(Py_TYPE(res2), Py_TPFLAGS_METHOD_DESCRIPTOR)); res = self; assert(oparg & 1); - #line 3620 "Python/generated_cases.c.h" + #line 3626 "Python/generated_cases.c.h" STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } @@ -3630,7 +3636,7 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); PyObject *descr = read_obj(&next_instr[5].cache); - #line 2579 "Python/bytecodes.c" + #line 2615 "Python/bytecodes.c" PyTypeObject *self_cls = Py_TYPE(self); DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_ATTR); assert(self_cls->tp_dictoffset == 0); @@ -3640,7 +3646,7 @@ res2 = Py_NewRef(descr); res = self; assert(oparg & 1); - #line 3644 "Python/generated_cases.c.h" + #line 3650 "Python/generated_cases.c.h" STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } @@ -3654,7 +3660,7 @@ PyObject *res; uint32_t type_version = read_u32(&next_instr[1].cache); PyObject *descr = read_obj(&next_instr[5].cache); - #line 2591 "Python/bytecodes.c" + #line 2627 "Python/bytecodes.c" PyTypeObject *self_cls = Py_TYPE(self); DEOPT_IF(self_cls->tp_version_tag != type_version, LOAD_ATTR); Py_ssize_t dictoffset = self_cls->tp_dictoffset; @@ -3668,7 +3674,7 @@ res2 = Py_NewRef(descr); res = self; assert(oparg & 1); - #line 3672 "Python/generated_cases.c.h" + #line 3678 "Python/generated_cases.c.h" STACK_GROW(((oparg & 1) ? 1 : 0)); stack_pointer[-1] = res; if (oparg & 1) { stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))] = res2; } @@ -3677,16 +3683,16 @@ } TARGET(KW_NAMES) { - #line 2607 "Python/bytecodes.c" + #line 2643 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg < PyTuple_GET_SIZE(frame->f_code->co_consts)); kwnames = GETITEM(frame->f_code->co_consts, oparg); - #line 3685 "Python/generated_cases.c.h" + #line 3691 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_CALL) { - #line 2613 "Python/bytecodes.c" + #line 2649 "Python/bytecodes.c" int is_meth = PEEK(oparg+2) != NULL; int total_args = oparg + is_meth; PyObject *function = PEEK(total_args + 1); @@ -3699,17 +3705,17 @@ _PyCallCache *cache = (_PyCallCache *)next_instr; INCREMENT_ADAPTIVE_COUNTER(cache->counter); GO_TO_INSTRUCTION(CALL); - #line 3703 "Python/generated_cases.c.h" + #line 3709 "Python/generated_cases.c.h" } TARGET(CALL) { PREDICTED(CALL); - static_assert(INLINE_CACHE_ENTRIES_CALL == 3, "incorrect cache size"); + static_assert(INLINE_CACHE_ENTRIES_CALL == 7, "incorrect cache size"); PyObject **args = (stack_pointer - oparg); PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2658 "Python/bytecodes.c" + #line 2694 "Python/bytecodes.c" int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -3722,6 +3728,10 @@ if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_Call(callable, next_instr, total_args, kwnames); + int result = _PyExternal_TrySpecialize(next_instr, &stack_pointer, (_PyCache *)cache); + if (result) { + oparg = next_instr->op.arg; + } DISPATCH_SAME_OPARG(); } STAT_INC(CALL, deferred); @@ -3791,11 +3801,11 @@ Py_DECREF(args[i]); } if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3795 "Python/generated_cases.c.h" + #line 3805 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3803,7 +3813,7 @@ TARGET(CALL_BOUND_METHOD_EXACT_ARGS) { PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; - #line 2746 "Python/bytecodes.c" + #line 2786 "Python/bytecodes.c" DEOPT_IF(method != NULL, CALL); DEOPT_IF(Py_TYPE(callable) != &PyMethod_Type, CALL); STAT_INC(CALL, hit); @@ -3813,7 +3823,7 @@ PEEK(oparg + 2) = Py_NewRef(meth); // method Py_DECREF(callable); GO_TO_INSTRUCTION(CALL_PY_EXACT_ARGS); - #line 3817 "Python/generated_cases.c.h" + #line 3827 "Python/generated_cases.c.h" } TARGET(CALL_PY_EXACT_ARGS) { @@ -3822,7 +3832,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; uint32_t func_version = read_u32(&next_instr[1].cache); - #line 2758 "Python/bytecodes.c" + #line 2798 "Python/bytecodes.c" assert(kwnames == NULL); DEOPT_IF(tstate->interp->eval_frame, CALL); int is_meth = method != NULL; @@ -3848,7 +3858,7 @@ JUMPBY(INLINE_CACHE_ENTRIES_CALL); frame->return_offset = 0; DISPATCH_INLINED(new_frame); - #line 3852 "Python/generated_cases.c.h" + #line 3862 "Python/generated_cases.c.h" } TARGET(CALL_PY_WITH_DEFAULTS) { @@ -3856,7 +3866,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; uint32_t func_version = read_u32(&next_instr[1].cache); - #line 2786 "Python/bytecodes.c" + #line 2826 "Python/bytecodes.c" assert(kwnames == NULL); DEOPT_IF(tstate->interp->eval_frame, CALL); int is_meth = method != NULL; @@ -3892,7 +3902,7 @@ JUMPBY(INLINE_CACHE_ENTRIES_CALL); frame->return_offset = 0; DISPATCH_INLINED(new_frame); - #line 3896 "Python/generated_cases.c.h" + #line 3906 "Python/generated_cases.c.h" } TARGET(CALL_NO_KW_TYPE_1) { @@ -3900,7 +3910,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *null = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2824 "Python/bytecodes.c" + #line 2864 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg == 1); DEOPT_IF(null != NULL, CALL); @@ -3910,11 +3920,11 @@ res = Py_NewRef(Py_TYPE(obj)); Py_DECREF(obj); Py_DECREF(&PyType_Type); // I.e., callable - #line 3914 "Python/generated_cases.c.h" + #line 3924 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; DISPATCH(); } @@ -3923,7 +3933,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *null = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2836 "Python/bytecodes.c" + #line 2876 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg == 1); DEOPT_IF(null != NULL, CALL); @@ -3934,11 +3944,11 @@ Py_DECREF(arg); Py_DECREF(&PyUnicode_Type); // I.e., callable if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3938 "Python/generated_cases.c.h" + #line 3948 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3948,7 +3958,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *null = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2850 "Python/bytecodes.c" + #line 2890 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg == 1); DEOPT_IF(null != NULL, CALL); @@ -3959,11 +3969,11 @@ Py_DECREF(arg); Py_DECREF(&PyTuple_Type); // I.e., tuple if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3963 "Python/generated_cases.c.h" + #line 3973 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -3973,7 +3983,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2864 "Python/bytecodes.c" + #line 2904 "Python/bytecodes.c" int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -3995,11 +4005,11 @@ } Py_DECREF(tp); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 3999 "Python/generated_cases.c.h" + #line 4009 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -4009,7 +4019,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2889 "Python/bytecodes.c" + #line 2929 "Python/bytecodes.c" /* Builtin METH_O functions */ assert(kwnames == NULL); int is_meth = method != NULL; @@ -4037,11 +4047,11 @@ Py_DECREF(arg); Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 4041 "Python/generated_cases.c.h" + #line 4051 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -4051,7 +4061,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2920 "Python/bytecodes.c" + #line 2960 "Python/bytecodes.c" /* Builtin METH_FASTCALL functions, without keywords */ assert(kwnames == NULL); int is_meth = method != NULL; @@ -4083,11 +4093,11 @@ 'invalid'). In those cases an exception is set, so we must handle it. */ - #line 4087 "Python/generated_cases.c.h" + #line 4097 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -4097,7 +4107,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2955 "Python/bytecodes.c" + #line 2995 "Python/bytecodes.c" /* Builtin METH_FASTCALL | METH_KEYWORDS functions */ int is_meth = method != NULL; int total_args = oparg; @@ -4129,11 +4139,11 @@ } Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 4133 "Python/generated_cases.c.h" + #line 4143 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -4143,7 +4153,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 2990 "Python/bytecodes.c" + #line 3030 "Python/bytecodes.c" assert(kwnames == NULL); /* len(o) */ int is_meth = method != NULL; @@ -4168,11 +4178,11 @@ Py_DECREF(callable); Py_DECREF(arg); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 4172 "Python/generated_cases.c.h" + #line 4182 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; DISPATCH(); } @@ -4181,7 +4191,7 @@ PyObject *callable = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 3017 "Python/bytecodes.c" + #line 3057 "Python/bytecodes.c" assert(kwnames == NULL); /* isinstance(o, o2) */ int is_meth = method != NULL; @@ -4208,11 +4218,11 @@ Py_DECREF(cls); Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 4212 "Python/generated_cases.c.h" + #line 4222 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; DISPATCH(); } @@ -4220,7 +4230,7 @@ PyObject **args = (stack_pointer - oparg); PyObject *self = stack_pointer[-(1 + oparg)]; PyObject *method = stack_pointer[-(2 + oparg)]; - #line 3047 "Python/bytecodes.c" + #line 3087 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg == 1); PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -4238,14 +4248,14 @@ JUMPBY(INLINE_CACHE_ENTRIES_CALL + 1); assert(next_instr[-1].op.code == POP_TOP); DISPATCH(); - #line 4242 "Python/generated_cases.c.h" + #line 4252 "Python/generated_cases.c.h" } TARGET(CALL_NO_KW_METHOD_DESCRIPTOR_O) { PyObject **args = (stack_pointer - oparg); PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 3067 "Python/bytecodes.c" + #line 3107 "Python/bytecodes.c" assert(kwnames == NULL); int is_meth = method != NULL; int total_args = oparg; @@ -4276,11 +4286,11 @@ Py_DECREF(arg); Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 4280 "Python/generated_cases.c.h" + #line 4290 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -4289,7 +4299,7 @@ PyObject **args = (stack_pointer - oparg); PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 3101 "Python/bytecodes.c" + #line 3141 "Python/bytecodes.c" int is_meth = method != NULL; int total_args = oparg; if (is_meth) { @@ -4318,11 +4328,11 @@ } Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 4322 "Python/generated_cases.c.h" + #line 4332 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -4331,7 +4341,7 @@ PyObject **args = (stack_pointer - oparg); PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 3133 "Python/bytecodes.c" + #line 3173 "Python/bytecodes.c" assert(kwnames == NULL); assert(oparg == 0 || oparg == 1); int is_meth = method != NULL; @@ -4360,11 +4370,11 @@ Py_DECREF(self); Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 4364 "Python/generated_cases.c.h" + #line 4374 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } @@ -4373,7 +4383,7 @@ PyObject **args = (stack_pointer - oparg); PyObject *method = stack_pointer[-(2 + oparg)]; PyObject *res; - #line 3165 "Python/bytecodes.c" + #line 3205 "Python/bytecodes.c" assert(kwnames == NULL); int is_meth = method != NULL; int total_args = oparg; @@ -4401,19 +4411,19 @@ } Py_DECREF(callable); if (res == NULL) { STACK_SHRINK(oparg); goto pop_2_error; } - #line 4405 "Python/generated_cases.c.h" + #line 4415 "Python/generated_cases.c.h" STACK_SHRINK(oparg); STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 3; + next_instr += 7; CHECK_EVAL_BREAKER(); DISPATCH(); } TARGET(INSTRUMENTED_CALL_FUNCTION_EX) { - #line 3196 "Python/bytecodes.c" + #line 3236 "Python/bytecodes.c" GO_TO_INSTRUCTION(CALL_FUNCTION_EX); - #line 4417 "Python/generated_cases.c.h" + #line 4427 "Python/generated_cases.c.h" } TARGET(CALL_FUNCTION_EX) { @@ -4422,7 +4432,7 @@ PyObject *callargs = stack_pointer[-(1 + ((oparg & 1) ? 1 : 0))]; PyObject *func = stack_pointer[-(2 + ((oparg & 1) ? 1 : 0))]; PyObject *result; - #line 3200 "Python/bytecodes.c" + #line 3240 "Python/bytecodes.c" // DICT_MERGE is called before this opcode if there are kwargs. // It converts all dict subtypes in kwargs into regular dicts. assert(kwargs == NULL || PyDict_CheckExact(kwargs)); @@ -4484,14 +4494,14 @@ } result = PyObject_Call(func, callargs, kwargs); } - #line 4488 "Python/generated_cases.c.h" + #line 4498 "Python/generated_cases.c.h" Py_DECREF(func); Py_DECREF(callargs); Py_XDECREF(kwargs); - #line 3262 "Python/bytecodes.c" + #line 3302 "Python/bytecodes.c" assert(PEEK(3 + (oparg & 1)) == NULL); if (result == NULL) { STACK_SHRINK(((oparg & 1) ? 1 : 0)); goto pop_3_error; } - #line 4495 "Python/generated_cases.c.h" + #line 4505 "Python/generated_cases.c.h" STACK_SHRINK(((oparg & 1) ? 1 : 0)); STACK_SHRINK(2); stack_pointer[-1] = result; @@ -4506,7 +4516,7 @@ PyObject *kwdefaults = (oparg & 0x02) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0))] : NULL; PyObject *defaults = (oparg & 0x01) ? stack_pointer[-(1 + ((oparg & 0x08) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x01) ? 1 : 0))] : NULL; PyObject *func; - #line 3272 "Python/bytecodes.c" + #line 3312 "Python/bytecodes.c" PyFunctionObject *func_obj = (PyFunctionObject *) PyFunction_New(codeobj, GLOBALS()); @@ -4535,14 +4545,14 @@ func_obj->func_version = ((PyCodeObject *)codeobj)->co_version; func = (PyObject *)func_obj; - #line 4539 "Python/generated_cases.c.h" + #line 4549 "Python/generated_cases.c.h" STACK_SHRINK(((oparg & 0x01) ? 1 : 0) + ((oparg & 0x02) ? 1 : 0) + ((oparg & 0x04) ? 1 : 0) + ((oparg & 0x08) ? 1 : 0)); stack_pointer[-1] = func; DISPATCH(); } TARGET(RETURN_GENERATOR) { - #line 3303 "Python/bytecodes.c" + #line 3343 "Python/bytecodes.c" assert(PyFunction_Check(frame->f_funcobj)); PyFunctionObject *func = (PyFunctionObject *)frame->f_funcobj; PyGenObject *gen = (PyGenObject *)_Py_MakeCoro(func); @@ -4563,7 +4573,7 @@ frame = cframe.current_frame = prev; _PyFrame_StackPush(frame, (PyObject *)gen); goto resume_frame; - #line 4567 "Python/generated_cases.c.h" + #line 4577 "Python/generated_cases.c.h" } TARGET(BUILD_SLICE) { @@ -4571,15 +4581,15 @@ PyObject *stop = stack_pointer[-(1 + ((oparg == 3) ? 1 : 0))]; PyObject *start = stack_pointer[-(2 + ((oparg == 3) ? 1 : 0))]; PyObject *slice; - #line 3326 "Python/bytecodes.c" + #line 3366 "Python/bytecodes.c" slice = PySlice_New(start, stop, step); - #line 4577 "Python/generated_cases.c.h" + #line 4587 "Python/generated_cases.c.h" Py_DECREF(start); Py_DECREF(stop); Py_XDECREF(step); - #line 3328 "Python/bytecodes.c" + #line 3368 "Python/bytecodes.c" if (slice == NULL) { STACK_SHRINK(((oparg == 3) ? 1 : 0)); goto pop_2_error; } - #line 4583 "Python/generated_cases.c.h" + #line 4593 "Python/generated_cases.c.h" STACK_SHRINK(((oparg == 3) ? 1 : 0)); STACK_SHRINK(1); stack_pointer[-1] = slice; @@ -4590,7 +4600,7 @@ PyObject *fmt_spec = ((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? stack_pointer[-((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))] : NULL; PyObject *value = stack_pointer[-(1 + (((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0))]; PyObject *result; - #line 3332 "Python/bytecodes.c" + #line 3372 "Python/bytecodes.c" /* Handles f-string value formatting. */ PyObject *(*conv_fn)(PyObject *); int which_conversion = oparg & FVC_MASK; @@ -4625,7 +4635,7 @@ Py_DECREF(value); Py_XDECREF(fmt_spec); if (result == NULL) { STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); goto pop_1_error; } - #line 4629 "Python/generated_cases.c.h" + #line 4639 "Python/generated_cases.c.h" STACK_SHRINK((((oparg & FVS_MASK) == FVS_HAVE_SPEC) ? 1 : 0)); stack_pointer[-1] = result; DISPATCH(); @@ -4634,10 +4644,10 @@ TARGET(COPY) { PyObject *bottom = stack_pointer[-(1 + (oparg-1))]; PyObject *top; - #line 3369 "Python/bytecodes.c" + #line 3409 "Python/bytecodes.c" assert(oparg > 0); top = Py_NewRef(bottom); - #line 4641 "Python/generated_cases.c.h" + #line 4651 "Python/generated_cases.c.h" STACK_GROW(1); stack_pointer[-1] = top; DISPATCH(); @@ -4645,16 +4655,20 @@ TARGET(BINARY_OP) { PREDICTED(BINARY_OP); - static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 1, "incorrect cache size"); + static_assert(INLINE_CACHE_ENTRIES_BINARY_OP == 5, "incorrect cache size"); PyObject *rhs = stack_pointer[-1]; PyObject *lhs = stack_pointer[-2]; PyObject *res; - #line 3374 "Python/bytecodes.c" + #line 3414 "Python/bytecodes.c" #if ENABLE_SPECIALIZATION _PyBinaryOpCache *cache = (_PyBinaryOpCache *)next_instr; if (ADAPTIVE_COUNTER_IS_ZERO(cache->counter)) { next_instr--; _Py_Specialize_BinaryOp(lhs, rhs, next_instr, oparg, &GETLOCAL(0)); + int result = _PyExternal_TrySpecialize(next_instr, &stack_pointer, (_PyCache *)cache); + if (result) { + oparg = next_instr->op.arg; + } DISPATCH_SAME_OPARG(); } STAT_INC(BINARY_OP, deferred); @@ -4663,32 +4677,32 @@ assert(0 <= oparg); assert((unsigned)oparg < Py_ARRAY_LENGTH(binary_ops)); assert(binary_ops[oparg]); - res = binary_ops[oparg](lhs, rhs); - #line 4668 "Python/generated_cases.c.h" + CMLQ_PAPI_REGION("binary_op", res = binary_ops[oparg](lhs, rhs)); + #line 4682 "Python/generated_cases.c.h" Py_DECREF(lhs); Py_DECREF(rhs); - #line 3389 "Python/bytecodes.c" + #line 3433 "Python/bytecodes.c" if (res == NULL) goto pop_2_error; - #line 4673 "Python/generated_cases.c.h" + #line 4687 "Python/generated_cases.c.h" STACK_SHRINK(1); stack_pointer[-1] = res; - next_instr += 1; + next_instr += 5; DISPATCH(); } TARGET(SWAP) { PyObject *top = stack_pointer[-1]; PyObject *bottom = stack_pointer[-(2 + (oparg-2))]; - #line 3394 "Python/bytecodes.c" + #line 3447 "Python/bytecodes.c" assert(oparg >= 2); - #line 4685 "Python/generated_cases.c.h" + #line 4699 "Python/generated_cases.c.h" stack_pointer[-1] = bottom; stack_pointer[-(2 + (oparg-2))] = top; DISPATCH(); } TARGET(INSTRUMENTED_INSTRUCTION) { - #line 3398 "Python/bytecodes.c" + #line 3451 "Python/bytecodes.c" int next_opcode = _Py_call_instrumentation_instruction( tstate, frame, next_instr-1); if (next_opcode < 0) goto error; @@ -4700,26 +4714,26 @@ assert(next_opcode > 0 && next_opcode < 256); opcode = next_opcode; DISPATCH_GOTO(); - #line 4704 "Python/generated_cases.c.h" + #line 4718 "Python/generated_cases.c.h" } TARGET(INSTRUMENTED_JUMP_FORWARD) { - #line 3412 "Python/bytecodes.c" + #line 3465 "Python/bytecodes.c" INSTRUMENTED_JUMP(next_instr-1, next_instr+oparg, PY_MONITORING_EVENT_JUMP); - #line 4710 "Python/generated_cases.c.h" + #line 4724 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_JUMP_BACKWARD) { - #line 3416 "Python/bytecodes.c" + #line 3469 "Python/bytecodes.c" INSTRUMENTED_JUMP(next_instr-1, next_instr-oparg, PY_MONITORING_EVENT_JUMP); - #line 4717 "Python/generated_cases.c.h" + #line 4731 "Python/generated_cases.c.h" CHECK_EVAL_BREAKER(); DISPATCH(); } TARGET(INSTRUMENTED_POP_JUMP_IF_TRUE) { - #line 3421 "Python/bytecodes.c" + #line 3474 "Python/bytecodes.c" PyObject *cond = POP(); int err = PyObject_IsTrue(cond); Py_DECREF(cond); @@ -4728,12 +4742,12 @@ assert(err == 0 || err == 1); int offset = err*oparg; INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH); - #line 4732 "Python/generated_cases.c.h" + #line 4746 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_POP_JUMP_IF_FALSE) { - #line 3432 "Python/bytecodes.c" + #line 3485 "Python/bytecodes.c" PyObject *cond = POP(); int err = PyObject_IsTrue(cond); Py_DECREF(cond); @@ -4742,12 +4756,12 @@ assert(err == 0 || err == 1); int offset = (1-err)*oparg; INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH); - #line 4746 "Python/generated_cases.c.h" + #line 4760 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_POP_JUMP_IF_NONE) { - #line 3443 "Python/bytecodes.c" + #line 3496 "Python/bytecodes.c" PyObject *value = POP(); _Py_CODEUNIT *here = next_instr-1; int offset; @@ -4759,12 +4773,12 @@ offset = 0; } INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH); - #line 4763 "Python/generated_cases.c.h" + #line 4777 "Python/generated_cases.c.h" DISPATCH(); } TARGET(INSTRUMENTED_POP_JUMP_IF_NOT_NONE) { - #line 3457 "Python/bytecodes.c" + #line 3510 "Python/bytecodes.c" PyObject *value = POP(); _Py_CODEUNIT *here = next_instr-1; int offset; @@ -4776,30 +4790,30 @@ offset = oparg; } INSTRUMENTED_JUMP(here, next_instr + offset, PY_MONITORING_EVENT_BRANCH); - #line 4780 "Python/generated_cases.c.h" + #line 4794 "Python/generated_cases.c.h" DISPATCH(); } TARGET(EXTENDED_ARG) { - #line 3471 "Python/bytecodes.c" + #line 3524 "Python/bytecodes.c" assert(oparg); opcode = next_instr->op.code; oparg = oparg << 8 | next_instr->op.arg; PRE_DISPATCH_GOTO(); DISPATCH_GOTO(); - #line 4791 "Python/generated_cases.c.h" + #line 4805 "Python/generated_cases.c.h" } TARGET(CACHE) { - #line 3479 "Python/bytecodes.c" + #line 3532 "Python/bytecodes.c" assert(0 && "Executing a cache."); Py_UNREACHABLE(); - #line 4798 "Python/generated_cases.c.h" + #line 4812 "Python/generated_cases.c.h" } TARGET(RESERVED) { - #line 3484 "Python/bytecodes.c" + #line 3537 "Python/bytecodes.c" assert(0 && "Executing RESERVED instruction."); Py_UNREACHABLE(); - #line 4805 "Python/generated_cases.c.h" + #line 4819 "Python/generated_cases.c.h" } diff --git a/Python/opcode_metadata.h b/Python/opcode_metadata.h index f9b1c928cd4845..661e4ce7233b59 100644 --- a/Python/opcode_metadata.h +++ b/Python/opcode_metadata.h @@ -795,7 +795,7 @@ _PyOpcode_num_pushed(int opcode, int oparg, bool jump) { } #endif -enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC00, INSTR_FMT_IBC000, INSTR_FMT_IBC00000000, INSTR_FMT_IBIB, INSTR_FMT_IX, INSTR_FMT_IXC, INSTR_FMT_IXC000 }; +enum InstructionFormat { INSTR_FMT_IB, INSTR_FMT_IBC, INSTR_FMT_IBC000, INSTR_FMT_IBC0000, INSTR_FMT_IBC000000, INSTR_FMT_IBC00000000, INSTR_FMT_IBIB, INSTR_FMT_IX, INSTR_FMT_IXC, INSTR_FMT_IXC000, INSTR_FMT_IXC0000, INSTR_FMT_IXC00000000 }; struct opcode_metadata { bool valid_entry; enum InstructionFormat instr_format; @@ -828,21 +828,21 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [UNARY_NEGATIVE] = { true, INSTR_FMT_IX }, [UNARY_NOT] = { true, INSTR_FMT_IX }, [UNARY_INVERT] = { true, INSTR_FMT_IX }, - [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC }, - [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC }, - [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC }, - [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC }, - [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC }, + [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC0000 }, [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IX }, - [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC }, - [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC }, - [BINARY_SUBSCR] = { true, INSTR_FMT_IXC }, + [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_SUBSCR] = { true, INSTR_FMT_IBC0000 }, [BINARY_SLICE] = { true, INSTR_FMT_IX }, [STORE_SLICE] = { true, INSTR_FMT_IX }, - [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC }, - [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC }, - [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC }, - [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC }, + [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC0000 }, + [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC0000 }, [LIST_APPEND] = { true, INSTR_FMT_IB }, [SET_ADD] = { true, INSTR_FMT_IB }, [STORE_SUBSCR] = { true, INSTR_FMT_IXC }, @@ -960,24 +960,24 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000 }, [KW_NAMES] = { true, INSTR_FMT_IB }, [INSTRUMENTED_CALL] = { true, INSTR_FMT_IB }, - [CALL] = { true, INSTR_FMT_IBC00 }, - [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00 }, - [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00 }, - [CALL_PY_WITH_DEFAULTS] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_TYPE_1] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_STR_1] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_TUPLE_1] = { true, INSTR_FMT_IBC00 }, - [CALL_BUILTIN_CLASS] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_BUILTIN_O] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_BUILTIN_FAST] = { true, INSTR_FMT_IBC00 }, - [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_LEN] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_ISINSTANCE] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_LIST_APPEND] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_METHOD_DESCRIPTOR_O] = { true, INSTR_FMT_IBC00 }, - [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00 }, - [CALL_NO_KW_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00 }, + [CALL] = { true, INSTR_FMT_IBC000000 }, + [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC000000 }, + [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC000000 }, + [CALL_PY_WITH_DEFAULTS] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_TYPE_1] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_STR_1] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_TUPLE_1] = { true, INSTR_FMT_IBC000000 }, + [CALL_BUILTIN_CLASS] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_BUILTIN_O] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_BUILTIN_FAST] = { true, INSTR_FMT_IBC000000 }, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_LEN] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_ISINSTANCE] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_LIST_APPEND] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_METHOD_DESCRIPTOR_O] = { true, INSTR_FMT_IBC000000 }, + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC000000 }, + [CALL_NO_KW_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC000000 }, [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX }, [CALL_FUNCTION_EX] = { true, INSTR_FMT_IB }, [MAKE_FUNCTION] = { true, INSTR_FMT_IB }, @@ -985,7 +985,7 @@ const struct opcode_metadata _PyOpcode_opcode_metadata[256] = { [BUILD_SLICE] = { true, INSTR_FMT_IB }, [FORMAT_VALUE] = { true, INSTR_FMT_IB }, [COPY] = { true, INSTR_FMT_IB }, - [BINARY_OP] = { true, INSTR_FMT_IBC }, + [BINARY_OP] = { true, INSTR_FMT_IBC0000 }, [SWAP] = { true, INSTR_FMT_IB }, [INSTRUMENTED_INSTRUCTION] = { true, INSTR_FMT_IX }, [INSTRUMENTED_JUMP_FORWARD] = { true, INSTR_FMT_IB }, diff --git a/Python/opcode_targets.h b/Python/opcode_targets.h index 3add06362711c9..752fd0d3269122 100644 --- a/Python/opcode_targets.h +++ b/Python/opcode_targets.h @@ -18,25 +18,27 @@ static void *opcode_targets[256] = { &&TARGET_BINARY_OP_SUBTRACT_FLOAT, &&TARGET_RESERVED, &&TARGET_BINARY_OP_SUBTRACT_INT, + &&TARGET_BINARY_OP_EXTERNAL, &&TARGET_BINARY_SUBSCR_DICT, &&TARGET_BINARY_SUBSCR_GETITEM, &&TARGET_BINARY_SUBSCR_LIST_INT, &&TARGET_BINARY_SUBSCR_TUPLE_INT, - &&TARGET_CALL_PY_EXACT_ARGS, - &&TARGET_CALL_PY_WITH_DEFAULTS, + &&TARGET_BINARY_SUBSCR_EXTERNAL, &&TARGET_BINARY_SUBSCR, &&TARGET_BINARY_SLICE, &&TARGET_STORE_SLICE, - &&TARGET_CALL_BOUND_METHOD_EXACT_ARGS, - &&TARGET_CALL_BUILTIN_CLASS, + &&TARGET_CALL_PY_EXACT_ARGS, + &&TARGET_CALL_PY_WITH_DEFAULTS, &&TARGET_GET_LEN, &&TARGET_MATCH_MAPPING, &&TARGET_MATCH_SEQUENCE, &&TARGET_MATCH_KEYS, - &&TARGET_CALL_BUILTIN_FAST_WITH_KEYWORDS, + &&TARGET_CALL_BOUND_METHOD_EXACT_ARGS, &&TARGET_PUSH_EXC_INFO, &&TARGET_CHECK_EXC_MATCH, &&TARGET_CHECK_EG_MATCH, + &&TARGET_CALL_BUILTIN_CLASS, + &&TARGET_CALL_BUILTIN_FAST_WITH_KEYWORDS, &&TARGET_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, &&TARGET_CALL_NO_KW_BUILTIN_FAST, &&TARGET_CALL_NO_KW_BUILTIN_O, @@ -46,8 +48,6 @@ static void *opcode_targets[256] = { &&TARGET_CALL_NO_KW_METHOD_DESCRIPTOR_FAST, &&TARGET_CALL_NO_KW_METHOD_DESCRIPTOR_NOARGS, &&TARGET_CALL_NO_KW_METHOD_DESCRIPTOR_O, - &&TARGET_CALL_NO_KW_STR_1, - &&TARGET_CALL_NO_KW_TUPLE_1, &&TARGET_WITH_EXCEPT_START, &&TARGET_GET_AITER, &&TARGET_GET_ANEXT, @@ -55,39 +55,39 @@ static void *opcode_targets[256] = { &&TARGET_BEFORE_WITH, &&TARGET_END_ASYNC_FOR, &&TARGET_CLEANUP_THROW, + &&TARGET_CALL_NO_KW_STR_1, + &&TARGET_CALL_NO_KW_TUPLE_1, &&TARGET_CALL_NO_KW_TYPE_1, + &&TARGET_CALL_EXTERNAL, + &&TARGET_STORE_SUBSCR, + &&TARGET_DELETE_SUBSCR, &&TARGET_COMPARE_OP_FLOAT, &&TARGET_COMPARE_OP_INT, &&TARGET_COMPARE_OP_STR, - &&TARGET_STORE_SUBSCR, - &&TARGET_DELETE_SUBSCR, &&TARGET_FOR_ITER_LIST, &&TARGET_FOR_ITER_TUPLE, &&TARGET_FOR_ITER_RANGE, + &&TARGET_GET_ITER, + &&TARGET_GET_YIELD_FROM_ITER, &&TARGET_FOR_ITER_GEN, + &&TARGET_LOAD_BUILD_CLASS, &&TARGET_LOAD_SUPER_ATTR_ATTR, &&TARGET_LOAD_SUPER_ATTR_METHOD, - &&TARGET_GET_ITER, - &&TARGET_GET_YIELD_FROM_ITER, + &&TARGET_LOAD_ASSERTION_ERROR, + &&TARGET_RETURN_GENERATOR, &&TARGET_LOAD_ATTR_CLASS, - &&TARGET_LOAD_BUILD_CLASS, &&TARGET_LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN, &&TARGET_LOAD_ATTR_INSTANCE_VALUE, - &&TARGET_LOAD_ASSERTION_ERROR, - &&TARGET_RETURN_GENERATOR, &&TARGET_LOAD_ATTR_MODULE, &&TARGET_LOAD_ATTR_PROPERTY, &&TARGET_LOAD_ATTR_SLOT, &&TARGET_LOAD_ATTR_WITH_HINT, - &&TARGET_LOAD_ATTR_METHOD_LAZY_DICT, - &&TARGET_LOAD_ATTR_METHOD_NO_DICT, - &&TARGET_LOAD_ATTR_METHOD_WITH_VALUES, &&TARGET_RETURN_VALUE, - &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_LOAD_ATTR_METHOD_LAZY_DICT, &&TARGET_SETUP_ANNOTATIONS, - &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_LOAD_ATTR_METHOD_NO_DICT, &&TARGET_LOAD_LOCALS, - &&TARGET_LOAD_FAST__LOAD_FAST, + &&TARGET_LOAD_ATTR_METHOD_WITH_VALUES, &&TARGET_POP_EXCEPT, &&TARGET_STORE_NAME, &&TARGET_DELETE_NAME, @@ -110,9 +110,9 @@ static void *opcode_targets[256] = { &&TARGET_IMPORT_NAME, &&TARGET_IMPORT_FROM, &&TARGET_JUMP_FORWARD, - &&TARGET_LOAD_GLOBAL_BUILTIN, - &&TARGET_LOAD_GLOBAL_MODULE, - &&TARGET_STORE_ATTR_INSTANCE_VALUE, + &&TARGET_LOAD_CONST__LOAD_FAST, + &&TARGET_LOAD_FAST__LOAD_CONST, + &&TARGET_LOAD_FAST__LOAD_FAST, &&TARGET_POP_JUMP_IF_FALSE, &&TARGET_POP_JUMP_IF_TRUE, &&TARGET_LOAD_GLOBAL, @@ -147,36 +147,36 @@ static void *opcode_targets[256] = { &&TARGET_LIST_APPEND, &&TARGET_SET_ADD, &&TARGET_MAP_ADD, - &&TARGET_STORE_ATTR_SLOT, + &&TARGET_LOAD_GLOBAL_BUILTIN, &&TARGET_COPY_FREE_VARS, &&TARGET_YIELD_VALUE, &&TARGET_RESUME, &&TARGET_MATCH_CLASS, - &&TARGET_STORE_ATTR_WITH_HINT, - &&TARGET_STORE_FAST__LOAD_FAST, + &&TARGET_LOAD_GLOBAL_MODULE, + &&TARGET_STORE_ATTR_INSTANCE_VALUE, &&TARGET_FORMAT_VALUE, &&TARGET_BUILD_CONST_KEY_MAP, &&TARGET_BUILD_STRING, + &&TARGET_STORE_ATTR_SLOT, + &&TARGET_STORE_ATTR_WITH_HINT, + &&TARGET_STORE_FAST__LOAD_FAST, &&TARGET_STORE_FAST__STORE_FAST, - &&TARGET_STORE_SUBSCR_DICT, - &&TARGET_STORE_SUBSCR_LIST_INT, - &&TARGET_UNPACK_SEQUENCE_LIST, &&TARGET_LIST_EXTEND, &&TARGET_SET_UPDATE, &&TARGET_DICT_MERGE, &&TARGET_DICT_UPDATE, + &&TARGET_STORE_SUBSCR_DICT, + &&TARGET_STORE_SUBSCR_LIST_INT, + &&TARGET_UNPACK_SEQUENCE_LIST, &&TARGET_UNPACK_SEQUENCE_TUPLE, &&TARGET_UNPACK_SEQUENCE_TWO_TUPLE, - &&TARGET_SEND_GEN, - &&_unknown_opcode, - &&_unknown_opcode, &&TARGET_CALL, &&TARGET_KW_NAMES, &&TARGET_CALL_INTRINSIC_1, &&TARGET_CALL_INTRINSIC_2, &&TARGET_LOAD_FROM_DICT_OR_GLOBALS, &&TARGET_LOAD_FROM_DICT_OR_DEREF, - &&_unknown_opcode, + &&TARGET_SEND_GEN, &&_unknown_opcode, &&_unknown_opcode, &&_unknown_opcode, diff --git a/Python/specialize.c b/Python/specialize.c index 63b44461007c6e..0c874575fd09bd 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -9,8 +9,11 @@ #include "pycore_opcode.h" // _PyOpcode_Caches #include "structmember.h" // struct PyMemberDef, T_OFFSET_EX #include "pycore_descrobject.h" +#include "opcode_metadata.h" #include // rand() +//#include +//#include /* For guidance on adding or extending families of instructions see * ./adaptive.md @@ -265,6 +268,21 @@ do { \ #define SPECIALIZATION_FAIL(opcode, kind) ((void)0) #endif +#if defined(Py_OPT_CMLQ_ENV) || defined(Py_OPT_CMLQ_ALWAYS) + +static void +write_size_byte(PyCodeObject* co, int offset, int val) +{ + ((unsigned char *)PyBytes_AS_STRING(co->co_size_table))[offset] = val&255; +} + +static unsigned char +get_size_byte(PyCodeObject* co, int offset) +{ + return ((unsigned char *)PyBytes_AS_STRING(co->co_size_table))[offset]; +} +#endif + // Initialize warmup counters and insert superinstructions. This cannot fail. void _PyCode_Quicken(PyCodeObject *code) @@ -277,6 +295,19 @@ _PyCode_Quicken(PyCodeObject *code) opcode = _Py_GetBaseOpcode(code, i); assert(opcode < MIN_INSTRUMENTED_OPCODE); int caches = _PyOpcode_Caches[opcode]; + +#if defined(Py_OPT_CMLQ_ENV) || defined(Py_OPT_CMLQ_ALWAYS) + + if (i + 1 < Py_SIZE(code)) { + if (caches) { + // Write the size byte for the next instruction (if any). + write_size_byte(code, i + 1 + caches, caches); + } else { + write_size_byte(code, i + 1, 0); + } + } +#endif + if (caches) { instructions[i + 1].cache = adaptive_counter_warmup(); i += caches; @@ -706,6 +737,7 @@ specialize_dict_access( return 0; } cache->index = (uint16_t)index; + cache->index = (uint16_t)index; write_u32(cache->version, type->tp_version_tag); instr->op.code = hint_op; } @@ -1317,6 +1349,7 @@ function_get_version(PyObject *o, int opcode) return version; } + void _Py_Specialize_BinarySubscr( PyObject *container, PyObject *sub, _Py_CODEUNIT *instr) @@ -1899,6 +1932,487 @@ binary_op_fail_kind(int oparg, PyObject *lhs, PyObject *rhs) } #endif +static PyExternalSpecializer *external_specializer = NULL; + + +#if defined(Py_OPT_CMLQ_ENV) || defined(Py_OPT_CMLQ_ALWAYS) +static PyExternalDeoptInfo* _PyExternal_NewDeoptInfo(_Py_CODEUNIT* instr, _PyInterpreterFrame* frame) { + PyExternalDeoptInfo* deopt_info = PyMem_Malloc(sizeof(PyExternalDeoptInfo)); + deopt_info->data = _PyOpcode_Caches[instr->op.code]; + deopt_info->orig_instr = *instr; + deopt_info->position = instr; + deopt_info->child = NULL; + deopt_info->next = NULL; + deopt_info->prev = NULL; + + PyExternalDeoptInfo* current = frame->f_code->co_deopt_info_head; + if (current == NULL) { + frame->f_code->co_deopt_info_head = deopt_info; + } else { + while (current->next != NULL) { + current = current->next; + } + + current->next = deopt_info; + deopt_info->prev = current; + } + return deopt_info; +} + +static PyExternalDeoptInfo* +_PyExternal_GetDeoptInfo(const _Py_CODEUNIT* instr, _PyInterpreterFrame* frame) { + PyExternalDeoptInfo* current = frame->f_code->co_deopt_info_head; + while (current != NULL) { + if (current->position == instr) { + return current; + } + current = current->next; + } + + return NULL; +} + + +static void +_PyExternal_DropDeoptInfo(PyExternalDeoptInfo* deopt_info, _PyInterpreterFrame* frame) { + assert(deopt_info != NULL); + if (deopt_info->next) { + deopt_info->next->prev = deopt_info->prev; + } + + if (deopt_info->prev) { + deopt_info->prev->next = deopt_info->next; + } + else { + frame->f_code->co_deopt_info_head = deopt_info->next; + } + + PyMem_Free(deopt_info); +} + + +static void +_PyExternal_FindStackposOriginator(_PyInterpreterFrame *frame, Py_ssize_t stack_sink_position, Py_ssize_t instruction_offset, + Py_ssize_t *originating_offset, Py_ssize_t *simulated_stack_size) { + // an array of instruction offsets indexed by stack positions + // the array contains the instruction offset of the last calculated originator of a stack element + Py_ssize_t last_origin[frame->f_code->co_stacksize]; + Py_ssize_t current_offset = instruction_offset; + Py_ssize_t current_stack_size = *simulated_stack_size; + + for (int i = 0; i < frame->f_code->co_stacksize; i++) { + last_origin[i] = -1; + } + + while (current_offset >= 0) { + + assert(current_stack_size >= 0); + + // skip to the previous instruction + unsigned char prev_cache_size = get_size_byte(frame->f_code, current_offset); + current_offset -= prev_cache_size; + current_offset--; + + // obtain the stack effect of the previous instruction + int opcode = _Py_GetBaseOpcode(frame->f_code, current_offset); + + if (opcode == FOR_ITER || opcode == JUMP_FORWARD || opcode == JUMP_BACKWARD) { + *originating_offset = -1; + break; + } + + int oparg = _PyCode_CODE(frame->f_code)[current_offset].op.arg; + int popped = _PyOpcode_num_popped(opcode, oparg, false); + int pushed = _PyOpcode_num_pushed(opcode, oparg, false); + + // mark the previous instruction as the originator for the current stack elements + for (int i=0; i= 0); + + Py_ssize_t source_instruction_offset = -1; + short skipped_stack_effect = 0; + + // the number of instructions (and cache elemnts) between the sink instruction and the originator + Py_ssize_t skipped_instructions = 0; + + // find the instruction that creates the stack value at position "position_on_stack" + // also update the "top_of_used_stack" to the value before the originating instruction + _PyExternal_FindStackposOriginator(frame, position_on_stack, consuming_instruction_offset, + &source_instruction_offset, simulated_stack_size); + + if (source_instruction_offset < 0) { + // For some reason we could not identify the originator + raise(SIGTRAP); + } + + _Py_CODEUNIT* originator = &_PyCode_CODE(frame->f_code)[source_instruction_offset]; + const int pushed = _PyOpcode_num_pushed(originator->op.code, originator->op.arg, false); + const int popped = _PyOpcode_num_popped(originator->op.code, originator->op.arg, false); + + skipped_instructions += _PyOpcode_Caches[_PyOpcode_Deopt[originator->op.code]] + 1; + skipped_stack_effect += pushed - popped; + + // trace backwards argument dataflow chain + if (popped > 0) { + short arg_stack_effect = 0; + ssize_t lowest_argument_position = *simulated_stack_size - popped; + _PyExternal_FindDataflowSource(source_instruction_offset, frame, lowest_argument_position, + &source_instruction_offset, + &arg_stack_effect, simulated_stack_size); + skipped_stack_effect += arg_stack_effect; + } + *stack_effect_out = skipped_stack_effect; + *instruction_offset_out = source_instruction_offset; +} + +static void +_PyExternal_SkipArgSetup(unsigned char unused_args_bitset, _Py_CODEUNIT* instr, PyObject** stack_pointer, + _PyInterpreterFrame* frame) +{ + Py_ssize_t initial_simulated_stack_size = (int) (stack_pointer - _PyFrame_Stackbase(frame)); + Py_ssize_t consuming_instruction_offset = instr - _PyCode_CODE(frame->f_code); + + for (int arg_position = 0; arg_position < 8; arg_position++) + { + if (unused_args_bitset & (1 << arg_position)) + { + Py_ssize_t arg_chain_start = -1; + short arg_chain_stack_effect = 0; + Py_ssize_t simulated_stack_size = initial_simulated_stack_size; + ssize_t arg_stack_position = initial_simulated_stack_size - arg_position - 1; + _PyExternal_FindDataflowSource(consuming_instruction_offset, frame, arg_stack_position, + &arg_chain_start, &arg_chain_stack_effect, &simulated_stack_size); + // TODO: make sure there are no side-effecting instructions in the skipped range + + assert(arg_chain_start >= 0); + PyExternalDeoptInfo *parent_deopt_info = _PyExternal_GetDeoptInfo(instr, frame); + _Py_CODEUNIT *first_in_chain = _PyCode_CODE(frame->f_code) + arg_chain_start; + PyExternalDeoptInfo *chain_start_deopt_info = _PyExternal_NewDeoptInfo(first_in_chain, frame); + + // rewrite to a jump + first_in_chain->op.code = JUMP_FORWARD; + // TODO: handle the case of more than one replaced argument + // a replaced argument chain must skip to the beginning of the argument chain of the next argument + // instead of directly to the consuming instruction as it is done here. In a second step, a chain of + // JUMP_FORWARD instructions can be converted into a single long jump + first_in_chain->op.arg = consuming_instruction_offset - arg_chain_start - 1; + + if (arg_chain_start > 0) + { + // If the previous instruction is a super instruction, we need to deoptimize it, otherwise the + // JUMP_FORWARD will never be called. We create a deopt record, however, so the super instruction is + // restored in case of a deopt + unsigned char prev_cache_size = get_size_byte(frame->f_code, arg_chain_start); + _Py_CODEUNIT *predecessor = _PyCode_CODE(frame->f_code) + arg_chain_start - prev_cache_size - 1; + if (predecessor->op.code == LOAD_FAST__LOAD_CONST) + { + PyExternalDeoptInfo *pred_deopt_info = _PyExternal_NewDeoptInfo(predecessor, frame); + chain_start_deopt_info->child = pred_deopt_info; + predecessor->op.code = LOAD_FAST; + } + } + + parent_deopt_info->child = chain_start_deopt_info; + +#ifdef CMLQ_DEBUG + printf("Replaced argument originator in %s at %ld with JUMP(%d)\n", function_name(frame), arg_chain_start, first_in_chain->op.arg); +#endif + + consuming_instruction_offset = arg_chain_start; + } + } +} + +// located in ceval.c +void PyExternal_SetCodeHandler(int slot, PyExternal_CodeHandler handler); + +#ifdef INSTR_STATS +void add_optimized_function(PyCodeObject* co); +#endif + +int +_PyExternal_IsDeoptInfoValid(_PyInterpreterFrame* frame) { + PyExternalDeoptInfo *current = frame->f_code->co_deopt_info_head; + while (current != NULL) { + if (current->next) { + assert(current->next->prev == current); + } + // TODO: if between any two replaced instructions there were non-replaced instructions, + // we need to undo their stack effect here. We don't support this case in _PyExternal_SkipArgSetup yet though. + assert(current->position); + assert(_PyExternal_GetDeoptInfo(current->position, frame) == current); + current = current->next; + } + + return 1; +} + +_Py_CODEUNIT* +_PyExternal_Deoptimize(const _Py_CODEUNIT *instr, _PyInterpreterFrame* frame) { +#ifdef CMLQ_DEBUG + Py_ssize_t offset = instr - _PyCode_CODE(frame->f_code); + printf("Deoptimizing in %s at %ld with slot %d\n", function_name(frame), offset, instr->op.arg); +#endif + + PyExternalDeoptInfo *current = _PyExternal_GetDeoptInfo(instr, frame); + _Py_CODEUNIT *replay_from; + do { + *current->position = current->orig_instr; + replay_from = current->position; + PyExternalDeoptInfo *previous = current; + current = current->child; + _PyExternal_DropDeoptInfo(previous, frame); + } while (current != NULL); + + assert(replay_from); + if (replay_from->op.code == LOAD_FAST__LOAD_CONST) { + // we replaced the super instruction only to reach the second instruction + // replay from this second instruction + replay_from += 1; + } + + return replay_from; +} + + +#endif + +static int PyExternal_SpecializeInstruction(_Py_CODEUNIT *instr, int slot, PyExternal_CodeHandler new_handler, void *external_cache_pointer) +{ +#if !defined(Py_OPT_CMLQ_ENV) && !defined(Py_OPT_CMLQ_ALWAYS) + return 0; +#else + + PyThreadState *tstate = PyThreadState_Get(); + _PyInterpreterFrame *frame = tstate->cframe->current_frame; +#ifdef CMLQ_DEBUG + printf("Quickened in %s at %ld with slot %d (%s)\n", function_name(frame), instr - _PyCode_CODE(frame->f_code), slot, + _PyUnicode_AsString(frame->f_code->co_filename));; +#endif + + +#ifdef INSTR_STATS + add_optimized_function(frame->f_code); +#endif + + PyExternal_SetCodeHandler(slot, new_handler); + _PyExternal_NewDeoptInfo(instr, frame); + + uint8_t orig_opcode = _PyOpcode_Deopt[instr->op.code]; + switch (orig_opcode) + { + case BINARY_OP: + { + instr->op.code = BINARY_OP_EXTERNAL; + _PyBinaryOpCache *cache = (_PyBinaryOpCache *)(instr + 1); + memcpy(cache->external_cache_pointer, &external_cache_pointer, sizeof(void*)); + break; + } + case CALL: + { + instr->op.code = CALL_EXTERNAL; + _PyCallCache *cache = (_PyCallCache *)(instr + 1); + memcpy(cache->external_cache_pointer, &external_cache_pointer, sizeof(void*)); + break; + } + case BINARY_SUBSCR: + { + instr->op.code = BINARY_SUBSCR_EXTERNAL; + _PyBinarySubscrCache *cache = (_PyBinarySubscrCache *)(instr + 1); + memcpy(cache->external_cache_pointer, &external_cache_pointer, sizeof(void*)); + break; + } + default: + { + raise(SIGTRAP); + } + } + + instr->op.arg = slot; + + return 0; +#endif +} + +int PyExternal_SpecializeChain(_Py_CODEUNIT *instr, PyObject **stack_pointer, int slot, PyExternal_CodeHandler new_handler, + unsigned char unused_args_bitset, void *external_cache_pointer) +{ +#if !defined(Py_OPT_CMLQ_ENV) && !defined(Py_OPT_CMLQ_ALWAYS) + return 0; +#else + + PyThreadState *tstate = PyThreadState_Get(); + _PyInterpreterFrame *frame = tstate->cframe->current_frame; + int result = PyExternal_SpecializeInstruction(instr, slot, new_handler, external_cache_pointer); + if (result < 0) + { + return result; + } + _PyExternal_SkipArgSetup(unused_args_bitset, instr, stack_pointer, frame); + + return 0; +#endif +} + +int PyExternal_IsConstant(_Py_CODEUNIT *instr, PyObject **stack_pointer, int stack_position) +{ +#if !defined(Py_OPT_CMLQ_ENV) && !defined(Py_OPT_CMLQ_ALWAYS) + return 0; +#else + + PyThreadState *tstate = PyThreadState_Get(); + _PyInterpreterFrame *frame = tstate->cframe->current_frame; + + Py_ssize_t instruction_offset; + Py_ssize_t simulated_stack_size = (int) (stack_pointer - _PyFrame_Stackbase(frame));; + Py_ssize_t consuming_instruction_offset = instr - _PyCode_CODE(frame->f_code); + _PyExternal_FindStackposOriginator(frame, stack_position, consuming_instruction_offset, &instruction_offset, &simulated_stack_size); + + if (instruction_offset < 0) + { + return 0; + } + + return _Py_GetBaseOpcode(frame->f_code, instruction_offset) == LOAD_CONST; + + return 0; +#endif +} + +void +PyExternal_SetSpecializer(PyExternalSpecializer *specializer) { + external_specializer = specializer; + specializer->SpecializeInstruction = PyExternal_SpecializeInstruction; + specializer->SpecializeChain = PyExternal_SpecializeChain; + specializer->IsOperandConstant = PyExternal_IsConstant; +} + +int +_PyExternal_TrySpecialize(_Py_CODEUNIT *instr, PyObject ***stack_pointer, _PyCache *cache) { + if (_PyOpcode_Deopt[instr->op.code] != instr->op.code) { + // the instruction was already specialized + return 0; + } + +#if !defined(Py_OPT_CMLQ_ENV) && !defined(Py_OPT_CMLQ_ALWAYS) + return 0; +#else + if (external_specializer && external_specializer->TrySpecialization) { + +#if defined(Py_OPT_CMLQ_ENV) + int should_rewrite = getenv("CMLQ_REWRITE") != NULL; + if (!should_rewrite) { + return 0; + } +#endif + +#ifdef INSTR_STATS + PyThreadState *tstate = PyThreadState_Get(); + _PyInterpreterFrame *frame = tstate->cframe->current_frame; + CMLQStatsElem *stats_elem = get_stats_elem(frame->f_code, instr); + stats_elem->specialization_attempts++; +#endif + int result = external_specializer->TrySpecialization(instr, stack_pointer); + + if (result) { + cache->counter = adaptive_counter_cooldown(); + return result; + } + } + + // don't apply a counter backoff here, it was already applied by the internal specialization attempt + return 0; +#endif +} + +void +_PyExternal_FunctionEnd(_PyInterpreterFrame* frame) { +#if !defined(Py_OPT_CMLQ_ENV) && !defined(Py_OPT_CMLQ_ALWAYS) + return 0; +#else + if (external_specializer) { +#if defined(Py_OPT_CMLQ_ENV) + int should_rewrite = getenv("CMLQ_REWRITE") != NULL; + if (!should_rewrite) { + return; + } +#endif + + if (frame->f_code->co_deopt_info_head && external_specializer && external_specializer->FunctionEnd) { + PyExternalDeoptInfo* current = frame->f_code->co_deopt_info_head; + + while (current != NULL) { + void* external_cache_pointer = NULL; +#define EXTRACT_POINTER_FROM_CACHE(TYPE) \ + POINTER_FROM_ARRAY(((TYPE *)(current->position + 1))->external_cache_pointer); + switch (current->position->op.code) { + case BINARY_OP_EXTERNAL: + { + external_cache_pointer = EXTRACT_POINTER_FROM_CACHE(_PyBinaryOpCache); + break; + } + case CALL_EXTERNAL: + { + external_cache_pointer = EXTRACT_POINTER_FROM_CACHE(_PyCallCache); + break; + } + case BINARY_SUBSCR_EXTERNAL: + { + external_cache_pointer = EXTRACT_POINTER_FROM_CACHE(_PyBinarySubscrCache); + break; + } + case JUMP_FORWARD: + case LOAD_FAST: + { + // from optimized chains and split super instructions + break; + } + default: + { + raise(SIGTRAP); + } + } + if (external_cache_pointer != NULL) { + external_specializer->FunctionEnd(current->position, external_cache_pointer); + } + current = current->next; + } +#undef EXTRACT_POINTER_FROM_CACHE + } + } +#endif +} + + void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, int oparg, PyObject **locals) diff --git a/README.md b/README.md new file mode 100644 index 00000000000000..f4b7dd60742816 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +This repository contains the CPython fork for the paper [Cross Module Quickening - The Curious Case of C Extensions](https://ucsrl.de/publications/cmq-ecoop24-preprint.pdf). +The code is a fork of [CPython](https://github.com/python/cpython) version `3.12.0` and adds the optimization interface described in the paper. +The optimization interface is entirely optional. +If no extension registers optimizations, everything should work as usual. + +You can find the most important changes in [specialize.c](Python/specialize.c) (functions prefixed with `_PyExternal`) and in [ceval.c](Python/ceval.c) (opcode defintions ending with `_EXTERNAL`). +For details on how to build CPython, please refer to the [official documentation](https://devguide.python.org/setup/) as well as the [Dockerfile](https://github.com/fberlakovich/cmq-npbench-ae/blob/ae/Dockerfile) in the `NPBench` repository. +The Dockerfile contains the specific build steps for this fork. diff --git a/README.rst b/README.rst deleted file mode 100644 index 979ca01896a6db..00000000000000 --- a/README.rst +++ /dev/null @@ -1,241 +0,0 @@ -This is Python version 3.12.0 -============================= - -.. image:: https://github.com/python/cpython/workflows/Tests/badge.svg - :alt: CPython build status on GitHub Actions - :target: https://github.com/python/cpython/actions - -.. image:: https://dev.azure.com/python/cpython/_apis/build/status/Azure%20Pipelines%20CI?branchName=main - :alt: CPython build status on Azure DevOps - :target: https://dev.azure.com/python/cpython/_build/latest?definitionId=4&branchName=main - -.. image:: https://img.shields.io/badge/discourse-join_chat-brightgreen.svg - :alt: Python Discourse chat - :target: https://discuss.python.org/ - - -Copyright © 2001-2023 Python Software Foundation. All rights reserved. - -See the end of this file for further copyright and license information. - -.. contents:: - -General Information -------------------- - -- Website: https://www.python.org -- Source code: https://github.com/python/cpython -- Issue tracker: https://github.com/python/cpython/issues -- Documentation: https://docs.python.org -- Developer's Guide: https://devguide.python.org/ - -Contributing to CPython ------------------------ - -For more complete instructions on contributing to CPython development, -see the `Developer Guide`_. - -.. _Developer Guide: https://devguide.python.org/ - -Using Python ------------- - -Installable Python kits, and information about using Python, are available at -`python.org`_. - -.. _python.org: https://www.python.org/ - -Build Instructions ------------------- - -On Unix, Linux, BSD, macOS, and Cygwin:: - - ./configure - make - make test - sudo make install - -This will install Python as ``python3``. - -You can pass many options to the configure script; run ``./configure --help`` -to find out more. On macOS case-insensitive file systems and on Cygwin, -the executable is called ``python.exe``; elsewhere it's just ``python``. - -Building a complete Python installation requires the use of various -additional third-party libraries, depending on your build platform and -configure options. Not all standard library modules are buildable or -useable on all platforms. Refer to the -`Install dependencies `_ -section of the `Developer Guide`_ for current detailed information on -dependencies for various Linux distributions and macOS. - -On macOS, there are additional configure and build options related -to macOS framework and universal builds. Refer to `Mac/README.rst -`_. - -On Windows, see `PCbuild/readme.txt -`_. - -If you wish, you can create a subdirectory and invoke configure from there. -For example:: - - mkdir debug - cd debug - ../configure --with-pydebug - make - make test - -(This will fail if you *also* built at the top-level directory. You should do -a ``make clean`` at the top-level first.) - -To get an optimized build of Python, ``configure --enable-optimizations`` -before you run ``make``. This sets the default make targets up to enable -Profile Guided Optimization (PGO) and may be used to auto-enable Link Time -Optimization (LTO) on some platforms. For more details, see the sections -below. - -Profile Guided Optimization -^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -PGO takes advantage of recent versions of the GCC or Clang compilers. If used, -either via ``configure --enable-optimizations`` or by manually running -``make profile-opt`` regardless of configure flags, the optimized build -process will perform the following steps: - -The entire Python directory is cleaned of temporary files that may have -resulted from a previous compilation. - -An instrumented version of the interpreter is built, using suitable compiler -flags for each flavor. Note that this is just an intermediary step. The -binary resulting from this step is not good for real-life workloads as it has -profiling instructions embedded inside. - -After the instrumented interpreter is built, the Makefile will run a training -workload. This is necessary in order to profile the interpreter's execution. -Note also that any output, both stdout and stderr, that may appear at this step -is suppressed. - -The final step is to build the actual interpreter, using the information -collected from the instrumented one. The end result will be a Python binary -that is optimized; suitable for distribution or production installation. - - -Link Time Optimization -^^^^^^^^^^^^^^^^^^^^^^ - -Enabled via configure's ``--with-lto`` flag. LTO takes advantage of the -ability of recent compiler toolchains to optimize across the otherwise -arbitrary ``.o`` file boundary when building final executables or shared -libraries for additional performance gains. - - -What's New ----------- - -We have a comprehensive overview of the changes in the `What's New in Python -3.12 `_ document. For a more -detailed change log, read `Misc/NEWS -`_, but a full -accounting of changes can only be gleaned from the `commit history -`_. - -If you want to install multiple versions of Python, see the section below -entitled "Installing multiple versions". - - -Documentation -------------- - -`Documentation for Python 3.12 `_ is online, -updated daily. - -It can also be downloaded in many formats for faster access. The documentation -is downloadable in HTML, PDF, and reStructuredText formats; the latter version -is primarily for documentation authors, translators, and people with special -formatting requirements. - -For information about building Python's documentation, refer to `Doc/README.rst -`_. - - -Converting From Python 2.x to 3.x ---------------------------------- - -Significant backward incompatible changes were made for the release of Python -3.0, which may cause programs written for Python 2 to fail when run with Python -3. For more information about porting your code from Python 2 to Python 3, see -the `Porting HOWTO `_. - - -Testing -------- - -To test the interpreter, type ``make test`` in the top-level directory. The -test set produces some output. You can generally ignore the messages about -skipped tests due to optional features which can't be imported. If a message -is printed about a failed test or a traceback or core dump is produced, -something is wrong. - -By default, tests are prevented from overusing resources like disk space and -memory. To enable these tests, run ``make testall``. - -If any tests fail, you can re-run the failing test(s) in verbose mode. For -example, if ``test_os`` and ``test_gdb`` failed, you can run:: - - make test TESTOPTS="-v test_os test_gdb" - -If the failure persists and appears to be a problem with Python rather than -your environment, you can `file a bug report -`_ and include relevant output from -that command to show the issue. - -See `Running & Writing Tests `_ -for more on running tests. - -Installing multiple versions ----------------------------- - -On Unix and Mac systems if you intend to install multiple versions of Python -using the same installation prefix (``--prefix`` argument to the configure -script) you must take care that your primary python executable is not -overwritten by the installation of a different version. All files and -directories installed using ``make altinstall`` contain the major and minor -version and can thus live side-by-side. ``make install`` also creates -``${prefix}/bin/python3`` which refers to ``${prefix}/bin/python3.X``. If you -intend to install multiple versions using the same prefix you must decide which -version (if any) is your "primary" version. Install that version using ``make -install``. Install all other versions using ``make altinstall``. - -For example, if you want to install Python 2.7, 3.6, and 3.12 with 3.12 being the -primary version, you would execute ``make install`` in your 3.12 build directory -and ``make altinstall`` in the others. - - -Release Schedule ----------------- - -See :pep:`693` for Python 3.12 release details. - - -Copyright and License Information ---------------------------------- - - -Copyright © 2001-2023 Python Software Foundation. All rights reserved. - -Copyright © 2000 BeOpen.com. All rights reserved. - -Copyright © 1995-2001 Corporation for National Research Initiatives. All -rights reserved. - -Copyright © 1991-1995 Stichting Mathematisch Centrum. All rights reserved. - -See the `LICENSE `_ for -information on the history of this software, terms & conditions for usage, and a -DISCLAIMER OF ALL WARRANTIES. - -This Python distribution contains *no* GNU General Public License (GPL) code, -so it may be used in proprietary projects. There are interfaces to some GNU -code but these are entirely optional. - -All trademarks referenced herein are property of their respective holders. diff --git a/Tools/build/generate_opcode_h.py b/Tools/build/generate_opcode_h.py index 5be981005725bf..e09a42d7e0545d 100644 --- a/Tools/build/generate_opcode_h.py +++ b/Tools/build/generate_opcode_h.py @@ -209,7 +209,7 @@ def main(opcode_py, outfile='Include/opcode.h', fobj.write(f"#define ENABLE_SPECIALIZATION {int(ENABLE_SPECIALIZATION)}") iobj.write("\n") - iobj.write("#ifdef Py_DEBUG\n") + iobj.write("#if defined(Py_DEBUG) || defined(INSTR_STATS)\n") iobj.write(f"static const char *const _PyOpcode_OpName[{NUM_OPCODES}] = {{\n") for op, name in enumerate(opname_including_specialized): if name[0] != "<": diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 62ddeac0265ad8..6b6497669a6fa1 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -855,6 +855,14 @@ def stack_analysis( "which are not supported in super- or macro instructions", instr.inst, # TODO: Pass name+location of super/macro ) + if any( + eff.cond for eff in instr.input_effects + instr.output_effects + ): + self.error( + f"Instruction {instr.name!r} has conditional stack effect, " + "which are not supported in super- or macro instructions", + instr.inst, + ) current -= len(instr.input_effects) lowest = min(lowest, current) current += len(instr.output_effects) diff --git a/aclocal.m4 b/aclocal.m4 index da8ee95b9c7f6b..78933baf2f651e 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -1,4 +1,4 @@ -# generated automatically by aclocal 1.16.4 -*- Autoconf -*- +# generated automatically by aclocal 1.16.5 -*- Autoconf -*- # Copyright (C) 1996-2021 Free Software Foundation, Inc. @@ -275,8 +275,8 @@ AC_DEFUN([AX_CHECK_OPENSSL], [ AC_SUBST([OPENSSL_LDFLAGS]) ]) -# pkg.m4 - Macros to locate and utilise pkg-config. -*- Autoconf -*- -# serial 11 (pkg-config-0.29.1) +# pkg.m4 - Macros to locate and use pkg-config. -*- Autoconf -*- +# serial 12 (pkg-config-0.29.2) dnl Copyright © 2004 Scott James Remnant . dnl Copyright © 2012-2015 Dan Nicholson @@ -318,7 +318,7 @@ dnl dnl See the "Since" comment for each macro you use to see what version dnl of the macros you require. m4_defun([PKG_PREREQ], -[m4_define([PKG_MACROS_VERSION], [0.29.1]) +[m4_define([PKG_MACROS_VERSION], [0.29.2]) m4_if(m4_version_compare(PKG_MACROS_VERSION, [$1]), -1, [m4_fatal([pkg.m4 version $1 or higher is required but ]PKG_MACROS_VERSION[ found])]) ])dnl PKG_PREREQ @@ -363,7 +363,7 @@ dnl Check to see whether a particular set of modules exists. Similar to dnl PKG_CHECK_MODULES(), but does not set variables or print errors. dnl dnl Please remember that m4 expands AC_REQUIRE([PKG_PROG_PKG_CONFIG]) -dnl only at the first occurence in configure.ac, so if the first place +dnl only at the first occurrence in configure.ac, so if the first place dnl it's called might be skipped (such as if it is within an "if", you dnl have to call PKG_CHECK_EXISTS manually AC_DEFUN([PKG_CHECK_EXISTS], @@ -419,7 +419,7 @@ AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl pkg_failed=no -AC_MSG_CHECKING([for $1]) +AC_MSG_CHECKING([for $2]) _PKG_CONFIG([$1][_CFLAGS], [cflags], [$2]) _PKG_CONFIG([$1][_LIBS], [libs], [$2]) @@ -429,17 +429,17 @@ and $1[]_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details.]) if test $pkg_failed = yes; then - AC_MSG_RESULT([no]) + AC_MSG_RESULT([no]) _PKG_SHORT_ERRORS_SUPPORTED if test $_pkg_short_errors_supported = yes; then - $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` - else - $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` + $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$2" 2>&1` + else + $1[]_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$2" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD + # Put the nasty error message in config.log where it belongs + echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD - m4_default([$4], [AC_MSG_ERROR( + m4_default([$4], [AC_MSG_ERROR( [Package requirements ($2) were not met: $$1_PKG_ERRORS @@ -450,8 +450,8 @@ installed software in a non-standard prefix. _PKG_TEXT])[]dnl ]) elif test $pkg_failed = untried; then - AC_MSG_RESULT([no]) - m4_default([$4], [AC_MSG_FAILURE( + AC_MSG_RESULT([no]) + m4_default([$4], [AC_MSG_FAILURE( [The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full path to pkg-config. @@ -461,10 +461,10 @@ _PKG_TEXT To get pkg-config, see .])[]dnl ]) else - $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS - $1[]_LIBS=$pkg_cv_[]$1[]_LIBS + $1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS + $1[]_LIBS=$pkg_cv_[]$1[]_LIBS AC_MSG_RESULT([yes]) - $3 + $3 fi[]dnl ])dnl PKG_CHECK_MODULES diff --git a/cmlq_examples/adi.py b/cmlq_examples/adi.py new file mode 100644 index 00000000000000..c1d06750e94a68 --- /dev/null +++ b/cmlq_examples/adi.py @@ -0,0 +1,82 @@ +import numpy as np + +__rewrite__ = False + +import contextlib + + +@contextlib.contextmanager +def disabled_cmlq(): + global __rewrite__ + before = __rewrite__ + __rewrite__ = False + yield + __rewrite__ = before + + +counter = 0 + + +def check_minus_result(left, right, result): + global counter + counter += 1 + with disabled_cmlq(): + if not np.allclose(left - right, result): + print(f"+++++++++++++++ OPERATION DOES NOT MATCH (Iteration {counter} +++++++++++++++") + print("Left:", left) + print() + print("Right:", right) + print() + print() + + +def kernel(TSTEPS, N): + global counter + u = np.fromfunction(lambda i, j: (i + N - j) / N, (N, N), dtype=np.float64) + + v = np.empty(u.shape, dtype=u.dtype) + p = np.empty(u.shape, dtype=u.dtype) + q = np.empty(u.shape, dtype=u.dtype) + + DX = 1.0 / N + DY = 1.0 / N + DT = 1.0 / TSTEPS + B1 = 2.0 + B2 = 1.0 + mul1 = B1 * DT / (DX * DX) + mul2 = B2 * DT / (DY * DY) + + a = -mul1 / 2.0 + b = 1.0 + mul2 + c = a + d = -mul2 / 2.0 + e = 1.0 + mul2 + f = d + + for t in range(1, TSTEPS + 1): + v[0, 1:N - 1] = 1.0 + p[1:N - 1, 0] = 0.0 + q[1:N - 1, 0] = v[0, 1:N - 1] + for j in range(1, N - 1): + p[1:N - 1, j] = -c / (a * p[1:N - 1, j - 1] + b) + q[1:N - 1, j] = (-d * u[j, 0:N - 2] + + (1.0 + 2.0 * d) * u[j, 1:N - 1] - f * u[j, 2:N] - + a * q[1:N - 1, j - 1]) / (a * p[1:N - 1, j - 1] + b) + v[N - 1, 1:N - 1] = 1.0 + for j in range(N - 2, 0, -1): + v[j, 1:N - 1] = p[1:N - 1, j] * v[j + 1, 1:N - 1] + q[1:N - 1, j] + + breakpoint() + u[1:N - 1, 0] = 1.0 + p[1:N - 1, 0] = 0.0 + q[1:N - 1, 0] = u[1:N - 1, 0] + for j in range(1, N - 1): + p[1:N - 1, j] = -f / (d * p[1:N - 1, j - 1] + e) + q[1:N - 1, j] = (-a * v[0:N - 2, j] + + (1.0 + 2.0 * a) * v[1:N - 1, j] - c * v[2:N, j] - + d * q[1:N - 1, j - 1]) / (d * p[1:N - 1, j - 1] + e) + u[1:N - 1, N - 1] = 1.0 + for j in range(N - 2, 0, -1): + u[1:N - 1, j] = p[1:N - 1, j] * u[1:N - 1, j + 1] + q[1:N - 1, j] + + return u diff --git a/cmlq_examples/adi_numpy.py b/cmlq_examples/adi_numpy.py new file mode 100644 index 00000000000000..3e32a8cae0bd87 --- /dev/null +++ b/cmlq_examples/adi_numpy.py @@ -0,0 +1,57 @@ +import numpy as np + + +def kernel(TSTEPS, N, u): + v = np.empty(u.shape, dtype=u.dtype) + p = np.empty(u.shape, dtype=u.dtype) + q = np.empty(u.shape, dtype=u.dtype) + + DX = 1.0 / N + DY = 1.0 / N + DT = 1.0 / TSTEPS + B1 = 2.0 + B2 = 1.0 + mul1 = B1 * DT / (DX * DX) + mul2 = B2 * DT / (DY * DY) + + a = -mul1 / 2.0 + b = 1.0 + mul2 + c = a + d = -mul2 / 2.0 + e = 1.0 + mul2 + f = d + + for t in range(1, TSTEPS + 1): + v[0, 1:N - 1] = 1.0 + p[1:N - 1, 0] = 0.0 + q[1:N - 1, 0] = v[0, 1:N - 1] + for j in range(1, N - 1): + p[1:N - 1, j] = -c / (a * p[1:N - 1, j - 1] + b) + q[1:N - 1, + j] = (-d * u[j, 0:N - 2] + + (1.0 + 2.0 * d) * u[j, 1:N - 1] - f * u[j, 2:N] - + a * q[1:N - 1, j - 1]) / (a * p[1:N - 1, j - 1] + b) + v[N - 1, 1:N - 1] = 1.0 + for j in range(N - 2, 0, -1): + v[j, 1:N - 1] = p[1:N - 1, j] * v[j + 1, 1:N - 1] + q[1:N - 1, j] + + u[1:N - 1, 0] = 1.0 + p[1:N - 1, 0] = 0.0 + q[1:N - 1, 0] = u[1:N - 1, 0] + for j in range(1, N - 1): + p[1:N - 1, j] = -f / (d * p[1:N - 1, j - 1] + e) + q[1:N - 1, + j] = (-a * v[0:N - 2, j] + + (1.0 + 2.0 * a) * v[1:N - 1, j] - c * v[2:N, j] - + d * q[1:N - 1, j - 1]) / (d * p[1:N - 1, j - 1] + e) + u[1:N - 1, N - 1] = 1.0 + for j in range(N - 2, 0, -1): + u[1:N - 1, j] = p[1:N - 1, j] * u[1:N - 1, j + 1] + q[1:N - 1, j] + + +def call_kernel(TSTEPS, N): + u = np.fromfunction(lambda i, j: (i + N - j) / N, (N, N), dtype=np.float64) + kernel(TSTEPS, N, u) + + +call_kernel(50, 500) diff --git a/cmlq_examples/adist.py b/cmlq_examples/adist.py new file mode 100644 index 00000000000000..fe852a24578022 --- /dev/null +++ b/cmlq_examples/adist.py @@ -0,0 +1,81 @@ +import numpy as np + +__rewrite__ = False + +import contextlib +import inspect + + +@contextlib.contextmanager +def disabled_cmlq(): + global __rewrite__ + before = __rewrite__ + __rewrite__ = False + yield + __rewrite__ = before + + +counter = 0 + + +def report_error(left, result, correct_result, wrong_result, operator): + global counter + if not np.allclose(correct_result, result): + print(f"+++++++++++++++ {operator} DOES NOT MATCH (Iteration {counter}) +++++++++++++++") + print("Left:", left) + print() + print("Right:", wrong_result) + print() + print("Correct Result:", correct_result) + print() + print("Wrong Result:", result) + print("Line:", inspect.currentframe().f_back.f_back.f_lineno) + +def check_power_result(left, right, result): + global counter + counter += 1 + with disabled_cmlq(): + correct_result = left ** right + report_error(left, result, correct_result, right, "power") + + +def check_subtract_result(left, right, result): + global counter + with disabled_cmlq(): + correct_result = left - right + report_error(left, result, correct_result, right, "subtract") + + +def check_div_result(left, right, result): + global counter + with disabled_cmlq(): + correct_result = left / right + report_error(left, result, correct_result, right, "div") + + +def arc_distance(N): + from numpy.random import default_rng + rng = default_rng(42) + theta_1, phi_1, theta_2, phi_2 = rng.random((N,)), rng.random((N,)), rng.random((N,)), rng.random((N,)) + + """ + Calculates the pairwise arc distance between all points in vector a and b. + """ + subtraction = theta_2 - theta_1 + check_subtract_result(theta_2, theta_1, subtraction) + div_result = (subtraction) / 2 + check_div_result(subtraction, 2, div_result) + left = np.sin(div_result) + right = 2 + power_result1 = left ** right + check_power_result(left, right, power_result1) + subtraction = (phi_2 - phi_1) + check_subtract_result(phi_2, phi_1, subtraction) + div_result = subtraction / 2 + check_div_result(subtraction, 2, div_result) + left = np.sin(div_result) + power_result2 = left ** right + check_power_result(left, right, power_result2) + temp = power_result1 + np.cos(theta_1) * np.cos(theta_2) * power_result2 + distance_matrix = 2 * (np.arctan2(np.sqrt(temp), np.sqrt(1 - temp))) + return distance_matrix diff --git a/cmlq_examples/bench_test.py b/cmlq_examples/bench_test.py new file mode 100644 index 00000000000000..d2d5100c2f5ffd --- /dev/null +++ b/cmlq_examples/bench_test.py @@ -0,0 +1,144 @@ +import copy +import dis +import sys +from io import BytesIO +from itertools import * + +import benchbase +from benchbase import (with_attributes, with_text, onlylib, + serialized, children, nochange) + +TEXT = "some ASCII text" +UTEXT = u"some klingon: \uF8D2" + + +############################################################ +# Benchmarks +############################################################ + +class BenchMark(benchbase.TreeBenchMark): + + def bench_create_subelements(self, root): + global __rewrite__ + SubElement = self.etree.SubElement + sum = 0 + for r in range(300): + __rewrite__ = True + for child in root: + SubElement(child, '{test}test') + sum = len(child) + sum + __rewrite__ = False + + # print(sum) + + +def writeinst(opc: str, arg: int = 0): + "Makes life easier in writing python bytecode" + nb = max(1, -(-arg.bit_length() // 8)) + ab = arg.to_bytes(nb, sys.byteorder) + ext_arg = dis._all_opmap['EXTENDED_ARG'] + inst = bytearray() + for i in range(nb - 1): + inst.append(ext_arg) + inst.append(ab[i]) + inst.append(dis._all_opmap[opc]) + inst.append(ab[-1]) + + return bytes(inst) + +def patch(patches): + code = dis.Bytecode(BenchMark.bench_create_subelements, show_caches=True) + bytelist = [] + for instr in code: + name = instr.opname + arg = instr.arg + if instr.offset in patches: + patch = patches[instr.offset] + name = patch[0] + if patch[1] is not None: + arg = patch[1] + + if arg is None: + arg = 0 + bytelist.append(writeinst(name, arg)) + + bytes = b"".join(bytelist) + + orig = BenchMark.bench_create_subelements.__code__ + BenchMark.bench_create_subelements.__code__ = orig.replace(co_code=bytes, co_consts=orig.co_consts, co_names=orig.co_names, + co_flags=orig.co_flags) + +if __name__ == '__main__': + # print("===== BEFORE PATCH =====") + # dis.dis(BenchMark.bench_create_subelements, show_caches=True) + # patch({ + # 86: ("LXML_FOR_ITER", None), + # 90: ("LXML_STORE_FAST", None), + # 92: ("NOP", None), + # 94: ("NOP", None), + # 96: ("LXML_LOAD_FAST", None), + # 100: ("LXML_SUBELEMENT", None), + # 108: ("LXML_POP_TOP", None), + # 110: ("LXML_LEN", None), + # 120: ("NOP", None), # will be skipped + # 122: ("NOP", None), # will be skipped + # }) + # print("===== AFTER PATCH =====") + # dis.dis(BenchMark.bench_create_subelements, show_caches=True) + + benchbase.main(BenchMark) + + +# 21 0 RESUME 0 +# +# 23 2 LOAD_FAST 0 (self) +# 4 LOAD_ATTR 0 (etree) +# 24 LOAD_ATTR 2 (SubElement) +# 44 STORE_FAST 2 (SubElement) +# +# 24 46 LOAD_CONST 1 (0) +# 48 STORE_FAST 3 (sum) +# +# 25 50 LOAD_GLOBAL 5 (NULL + range) +# 60 LOAD_CONST 2 (100) +# 62 CALL 1 +# 70 GET_ITER +# >> 72 FOR_ITER 36 (to 148) +# 76 STORE_FAST 4 (r) +# +# 26 78 LOAD_CONST 3 (True) +# 80 STORE_GLOBAL 3 (__rewrite__) +# +# 27 82 LOAD_FAST 1 (root) +# 84 GET_ITER +# >> 86 FOR_ITER 25 (to 140) +# 90 STORE_FAST 5 (child) +# +# 28 92 PUSH_NULL +# 94 LOAD_FAST 2 (SubElement) +# 96 LOAD_FAST 5 (child) +# 98 LOAD_CONST 4 ('{test}test') +# 100 CALL 2 +# 108 POP_TOP +# +# 29 110 LOAD_GLOBAL 9 (NULL + len) +# 120 LOAD_FAST 5 (child) +# 122 CALL 1 +# 130 LOAD_FAST 3 (sum) +# 132 BINARY_OP 0 (+) +# 136 STORE_FAST 3 (sum) +# 138 JUMP_BACKWARD 27 (to 86) +# +# 27 >> 140 END_FOR +# +# 30 142 LOAD_CONST 5 (False) +# 144 STORE_GLOBAL 3 (__rewrite__) +# 146 JUMP_BACKWARD 38 (to 72) +# +# 25 >> 148 END_FOR +# +# 32 150 LOAD_GLOBAL 11 (NULL + print) +# 160 LOAD_FAST 3 (sum) +# 162 CALL 1 +# 170 POP_TOP +# 172 RETURN_CONST 0 (None) \ No newline at end of file diff --git a/cmlq_examples/channel_flow.py b/cmlq_examples/channel_flow.py new file mode 100644 index 00000000000000..ecadd0c3cb6511 --- /dev/null +++ b/cmlq_examples/channel_flow.py @@ -0,0 +1,138 @@ +import numpy as np + +def build_up_b(rho, dt, dx, dy, u, v): + b = np.zeros_like(u) + b[1:-1, + 1:-1] = (rho * (1 / dt * ((u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx) + + (v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy)) - + ((u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx)) ** 2 - 2 * + ((u[2:, 1:-1] - u[0:-2, 1:-1]) / (2 * dy) * + (v[1:-1, 2:] - v[1:-1, 0:-2]) / (2 * dx)) - + ((v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy)) ** 2)) + + # Periodic BC Pressure @ x = 2 + b[1:-1, -1] = (rho * (1 / dt * ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx) + + (v[2:, -1] - v[0:-2, -1]) / (2 * dy)) - + ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx)) ** 2 - 2 * + ((u[2:, -1] - u[0:-2, -1]) / (2 * dy) * + (v[1:-1, 0] - v[1:-1, -2]) / (2 * dx)) - + ((v[2:, -1] - v[0:-2, -1]) / (2 * dy)) ** 2)) + + # Periodic BC Pressure @ x = 0 + b[1:-1, 0] = (rho * (1 / dt * ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx) + + (v[2:, 0] - v[0:-2, 0]) / (2 * dy)) - + ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx)) ** 2 - 2 * + ((u[2:, 0] - u[0:-2, 0]) / (2 * dy) * + (v[1:-1, 1] - v[1:-1, -1]) / + (2 * dx)) - ((v[2:, 0] - v[0:-2, 0]) / (2 * dy)) ** 2)) + + return b + + +def pressure_poisson_periodic(nit, p, dx, dy, b): + pn = np.empty_like(p) + + for q in range(nit): + pn = p.copy() + p[1:-1, 1:-1] = (((pn[1:-1, 2:] + pn[1:-1, 0:-2]) * dy ** 2 + + (pn[2:, 1:-1] + pn[0:-2, 1:-1]) * dx ** 2) / + (2 * (dx ** 2 + dy ** 2)) - dx ** 2 * dy ** 2 / + (2 * (dx ** 2 + dy ** 2)) * b[1:-1, 1:-1]) + + # Periodic BC Pressure @ x = 2 + p[1:-1, -1] = (((pn[1:-1, 0] + pn[1:-1, -2]) * dy ** 2 + + (pn[2:, -1] + pn[0:-2, -1]) * dx ** 2) / + (2 * (dx ** 2 + dy ** 2)) - dx ** 2 * dy ** 2 / + (2 * (dx ** 2 + dy ** 2)) * b[1:-1, -1]) + + # Periodic BC Pressure @ x = 0 + p[1:-1, + 0] = (((pn[1:-1, 1] + pn[1:-1, -1]) * dy ** 2 + + (pn[2:, 0] + pn[0:-2, 0]) * dx ** 2) / (2 * (dx ** 2 + dy ** 2)) - + dx ** 2 * dy ** 2 / (2 * (dx ** 2 + dy ** 2)) * b[1:-1, 0]) + + # Wall boundary conditions, pressure + p[-1, :] = p[-2, :] # dp/dy = 0 at y = 2 + p[0, :] = p[1, :] # dp/dy = 0 at y = 0 + + +def channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F): + udiff = 1 + stepcount = 0 + + while udiff > .001: + un = u.copy() + vn = v.copy() + + b = build_up_b(rho, dt, dx, dy, u, v) + pressure_poisson_periodic(nit, p, dx, dy, b) + + u[1:-1, 1:-1] = (un[1:-1, 1:-1] - un[1:-1, 1:-1] * dt / dx * + (un[1:-1, 1:-1] - un[1:-1, 0:-2]) - + vn[1:-1, 1:-1] * dt / dy * + (un[1:-1, 1:-1] - un[0:-2, 1:-1]) - dt / (2 * rho * dx) * + (p[1:-1, 2:] - p[1:-1, 0:-2]) + nu * + (dt / dx ** 2 * + (un[1:-1, 2:] - 2 * un[1:-1, 1:-1] + un[1:-1, 0:-2]) + + dt / dy ** 2 * + (un[2:, 1:-1] - 2 * un[1:-1, 1:-1] + un[0:-2, 1:-1])) + + F * dt) + + v[1:-1, 1:-1] = (vn[1:-1, 1:-1] - un[1:-1, 1:-1] * dt / dx * + (vn[1:-1, 1:-1] - vn[1:-1, 0:-2]) - + vn[1:-1, 1:-1] * dt / dy * + (vn[1:-1, 1:-1] - vn[0:-2, 1:-1]) - dt / (2 * rho * dy) * + (p[2:, 1:-1] - p[0:-2, 1:-1]) + nu * + (dt / dx ** 2 * + (vn[1:-1, 2:] - 2 * vn[1:-1, 1:-1] + vn[1:-1, 0:-2]) + + dt / dy ** 2 * + (vn[2:, 1:-1] - 2 * vn[1:-1, 1:-1] + vn[0:-2, 1:-1]))) + + # Periodic BC u @ x = 2 + u[1:-1, -1] = ( + un[1:-1, -1] - un[1:-1, -1] * dt / dx * + (un[1:-1, -1] - un[1:-1, -2]) - vn[1:-1, -1] * dt / dy * + (un[1:-1, -1] - un[0:-2, -1]) - dt / (2 * rho * dx) * + (p[1:-1, 0] - p[1:-1, -2]) + nu * + (dt / dx ** 2 * + (un[1:-1, 0] - 2 * un[1:-1, -1] + un[1:-1, -2]) + dt / dy ** 2 * + (un[2:, -1] - 2 * un[1:-1, -1] + un[0:-2, -1])) + F * dt) + + # Periodic BC u @ x = 0 + u[1:-1, 0] = (un[1:-1, 0] - un[1:-1, 0] * dt / dx * + (un[1:-1, 0] - un[1:-1, -1]) - vn[1:-1, 0] * dt / dy * + (un[1:-1, 0] - un[0:-2, 0]) - dt / (2 * rho * dx) * + (p[1:-1, 1] - p[1:-1, -1]) + nu * + (dt / dx ** 2 * + (un[1:-1, 1] - 2 * un[1:-1, 0] + un[1:-1, -1]) + dt / dy ** 2 * + (un[2:, 0] - 2 * un[1:-1, 0] + un[0:-2, 0])) + F * dt) + + # Periodic BC v @ x = 2 + v[1:-1, -1] = ( + vn[1:-1, -1] - un[1:-1, -1] * dt / dx * + (vn[1:-1, -1] - vn[1:-1, -2]) - vn[1:-1, -1] * dt / dy * + (vn[1:-1, -1] - vn[0:-2, -1]) - dt / (2 * rho * dy) * + (p[2:, -1] - p[0:-2, -1]) + nu * + (dt / dx ** 2 * + (vn[1:-1, 0] - 2 * vn[1:-1, -1] + vn[1:-1, -2]) + dt / dy ** 2 * + (vn[2:, -1] - 2 * vn[1:-1, -1] + vn[0:-2, -1]))) + + # Periodic BC v @ x = 0 + v[1:-1, 0] = (vn[1:-1, 0] - un[1:-1, 0] * dt / dx * + (vn[1:-1, 0] - vn[1:-1, -1]) - vn[1:-1, 0] * dt / dy * + (vn[1:-1, 0] - vn[0:-2, 0]) - dt / (2 * rho * dy) * + (p[2:, 0] - p[0:-2, 0]) + nu * + (dt / dx ** 2 * + (vn[1:-1, 1] - 2 * vn[1:-1, 0] + vn[1:-1, -1]) + dt / dy ** 2 * + (vn[2:, 0] - 2 * vn[1:-1, 0] + vn[0:-2, 0]))) + + # Wall BC: u,v = 0 @ y = 0,2 + u[0, :] = 0 + u[-1, :] = 0 + v[0, :] = 0 + v[-1, :] = 0 + + udiff = (np.sum(u) - np.sum(un)) / np.sum(u) + stepcount += 1 + + return stepcount diff --git a/cmlq_examples/floyd_warshall.py b/cmlq_examples/floyd_warshall.py new file mode 100644 index 00000000000000..4b994ec5fc1541 --- /dev/null +++ b/cmlq_examples/floyd_warshall.py @@ -0,0 +1,19 @@ +# Copyright 2021 ETH Zurich and the NPBench authors. All rights reserved. + +import numpy as np +def kernel(N, datatype=np.int32): + + path = np.fromfunction(lambda i, j: i * j % 7 + 1, (N, N), dtype=datatype) + for i in range(N): + for j in range(N): + if (i + j) % 13 == 0 or (i + j) % 7 == 0 or (i + j) % 11 == 0: + path[i, j] = 999 + + for k in range(path.shape[0]): + left = path[:, k] + right = path[k, :] + breakpoint() + outer = np.add.outer(left, right) + path[:] = np.minimum(path[:], outer) + + return path \ No newline at end of file diff --git a/cmlq_examples/gramschmidt.py b/cmlq_examples/gramschmidt.py new file mode 100644 index 00000000000000..d57dd505cba69c --- /dev/null +++ b/cmlq_examples/gramschmidt.py @@ -0,0 +1,28 @@ +import numpy as np +import util + +def kernel(M, N, datatype=np.float64): + from numpy.random import default_rng + rng = default_rng(42) + + A = rng.random((M, N), dtype=datatype) + while np.linalg.matrix_rank(A) < N: + A = rng.random((M, N), dtype=datatype) + + Q = np.zeros_like(A) + R = np.zeros((A.shape[1], A.shape[1]), dtype=A.dtype) + + for k in range(A.shape[1]): + nrm = np.dot(A[:, k], A[:, k]) + R[k, k] = np.sqrt(nrm) + Q[:, k] = A[:, k] / R[k, k] + for j in range(k + 1, A.shape[1]): + R[k, j] = np.dot(Q[:, k], A[:, j]) + + mult = Q[:, k] * R[k, j] + before = A[:, j].copy() + breakpoint() + A[:, j] -= mult + util.check_sub_result(before, mult, A[:, j]) + + return Q, R \ No newline at end of file diff --git a/cmlq_examples/grayscott.py b/cmlq_examples/grayscott.py new file mode 100644 index 00000000000000..78bdfd25814460 --- /dev/null +++ b/cmlq_examples/grayscott.py @@ -0,0 +1,38 @@ +import numpy as np + +import os +import util + +counter = 0 + +def grayscott(counts, Du, Dv, F, k): + np.random.seed(4711) + n = 300 + U = np.zeros((n+2,n+2), dtype=np.float32) + V = np.zeros((n+2,n+2), dtype=np.float32) + u, v = U[1:-1,1:-1], V[1:-1,1:-1] + + r = 20 + u[:] = 1.0 + U[n//2-r:n//2+r,n//2-r:n//2+r] = 0.50 + V[n//2-r:n//2+r,n//2-r:n//2+r] = 0.25 + u += 0.15*np.random.random((n,n)) + v += 0.15*np.random.random((n,n)) + + for i in range(counts): + long_mul = 4 * U[1:-1, 1:-1] + util.check_mul_result(4, U[1:-1, 1:-1], long_mul) + Lu = ( U[0:-2,1:-1] + + U[1:-1,0:-2] - long_mul + U[1:-1, 2:] + + U[2: ,1:-1]) + Lv = ( V[0:-2,1:-1] + + V[1:-1,0:-2] - 4*V[1:-1,1:-1] + V[1:-1,2:] + + V[2: ,1:-1] ) + uvv = u*v*v + sub = 1 - u + + mul = Du * Lu + u += mul - uvv + F * (sub) + v += Dv*Lv + uvv - (F + k)*v + + return V \ No newline at end of file diff --git a/cmlq_examples/grouping.py b/cmlq_examples/grouping.py new file mode 100644 index 00000000000000..dba475f0ef8bdd --- /dev/null +++ b/cmlq_examples/grouping.py @@ -0,0 +1,20 @@ +import numpy as np +values = None + +def initialize(): + global values + if values is not None: + return values + + print("Initializing values") + N = 500000 + np.random.seed(0) + values = np.array(np.random.randint(0,3298,size=N),dtype='u4') + values.sort() + return values + +def grouping(values): + import numpy as np + diff = np.concatenate(([1], np.diff(values))) + idx = np.concatenate((np.where(diff)[0], [len(values)])) + return values[idx[:-1]], np.diff(idx) \ No newline at end of file diff --git a/cmlq_examples/jacobi_2d.py b/cmlq_examples/jacobi_2d.py new file mode 100644 index 00000000000000..025918ca150aff --- /dev/null +++ b/cmlq_examples/jacobi_2d.py @@ -0,0 +1,56 @@ +import numpy as np + +__rewrite__ = False + +import contextlib + + +@contextlib.contextmanager +def disabled_cmlq(): + global __rewrite__ + before = __rewrite__ + __rewrite__ = False + yield + __rewrite__ = before + + +counter = 0 + + +def check_add_result(left, right, result): + global counter + counter += 1 + with disabled_cmlq(): + correct_result = left + right + if not np.allclose(correct_result, result): + print(f"+++++++++++++++ OPERATION DOES NOT MATCH (Iteration {counter} +++++++++++++++") + print("Left:", left) + print() + print("Right:", right) + print() + print("Correct Result:", correct_result) + print() + print("Wrong Result:", result) + + +def kernel(TSTEPS, N, datatype=np.float64): + A = np.fromfunction(lambda i, j: i * (j + 2) / N, (N, N), dtype=datatype) + B = np.fromfunction(lambda i, j: i * (j + 3) / N, (N, N), dtype=datatype) + + for t in range(1, TSTEPS): + # add1 = A[1:-1, 1:-1] + A[1:-1, :-2] + # add2 = add1 + A[1:-1, 2:] + # add3 = add2 + A[2:, 1:-1] + # add4 = add3 + A[:-2, 1:-1] + # B[1:-1, 1:-1] = 0.2 * (add4) + # add1 = B[1:-1, 1:-1] + B[1:-1, :-2] + # add2 = add1 + B[1:-1, 2:] + # add3 = add2 + B[2:, 1:-1] + # add4 = (add3 + B[:-2, 1:-1]) + # A[1:-1, 1:-1] = 0.2 * add4 + B[1:-1, 1:-1] = 0.2 * (A[1:-1, 1:-1] + A[1:-1, :-2] + A[1:-1, 2:] + + A[2:, 1:-1] + A[:-2, 1:-1]) + A[1:-1, 1:-1] = 0.2 * (B[1:-1, 1:-1] + B[1:-1, :-2] + B[1:-1, 2:] + + B[2:, 1:-1] + B[:-2, 1:-1]) + + return A, B diff --git a/cmlq_examples/l1norm.py b/cmlq_examples/l1norm.py new file mode 100644 index 00000000000000..a7192ef5a56f58 --- /dev/null +++ b/cmlq_examples/l1norm.py @@ -0,0 +1,3 @@ +import numpy as np +def l1norm(x, y): + return np.sum(np.abs(x[:, None, :] - y), axis=-1) \ No newline at end of file diff --git a/cmlq_examples/l2norm.py b/cmlq_examples/l2norm.py new file mode 100644 index 00000000000000..a3995d87b073a9 --- /dev/null +++ b/cmlq_examples/l2norm.py @@ -0,0 +1,7 @@ +import numpy as np + +def l2norm(): + N = 1000 + np.random.seed(10) + x = np.random.rand(N,N) + return np.sqrt(np.sum(np.abs(x)**2, 1)) \ No newline at end of file diff --git a/cmlq_examples/laplace.py b/cmlq_examples/laplace.py new file mode 100644 index 00000000000000..25f584acbe5e66 --- /dev/null +++ b/cmlq_examples/laplace.py @@ -0,0 +1,111 @@ +import sys + +import numpy +# import numba + +test_numba = len(sys.argv) > 1 and sys.argv[1] == "numba" + + +# @numba.experimental.jitclass +class Grid: + """A simple grid class that stores the details and solution of the + computational grid.""" + + xmin: float + xmax: float + ymin: float + ymax: float + dx: float + dy: float + + def __init__(self, nx=10, ny=10, xmin=0.0, xmax=1.0, + ymin=0.0, ymax=1.0): + self.xmin, self.xmax, self.ymin, self.ymax = xmin, xmax, ymin, ymax + self.dx = float(xmax - xmin) / (nx - 1) + self.dy = float(ymax - ymin) / (ny - 1) + self.u = numpy.zeros((nx, ny), 'd') + # used to compute the change in solution in some of the methods. + self.old_u = self.u.copy() + + def set_boundary_condition(self, func): + """Sets the BC given a function of two variables.""" + xmin, ymin = self.xmin, self.ymin + xmax, ymax = self.xmax, self.ymax + x = numpy.arange(xmin, xmax + self.dx * 0.5, self.dx) + y = numpy.arange(ymin, ymax + self.dy * 0.5, self.dy) + self.u[0, :] = func(xmin, y) + self.u[-1, :] = func(xmax, y) + self.u[:, 0] = func(x, ymin) + self.u[:, -1] = func(x, ymax) + + def computeError(self): + """Computes absolute error using an L2 norm for the solution. + This requires that self.u and self.old_u must be appropriately + setup.""" + v = (self.u - self.old_u).flat + return numpy.sqrt(numpy.dot(v, v)) + + +# @numba.experimental.jitclass +class LaplaceSolver: + """A simple Laplacian solver that can use different schemes to + solve the problem.""" + + def __init__(self, grid): + self.grid = grid + + def step(self, dt=0.0): + """Takes a time step using a NumPy expression.""" + g = self.grid + dx2, dy2 = g.dx ** 2, g.dy ** 2 + dnr_inv = 0.5 / (dx2 + dy2) + u = g.u + g.old_u = u.copy() # needed to compute the error. + + # The actual iteration + u[1:-1, 1:-1] = ((u[0:-2, 1:-1] + u[2:, 1:-1]) * dy2 + + (u[1:-1, 0:-2] + u[1:-1, 2:]) * dx2) * dnr_inv + + v = (g.u - g.old_u).flat + return numpy.sqrt(numpy.dot(v, v)) + + def solve(self, n_iter=0, eps=1.0e-16): + err = self.step() + count = 1 + + while err > eps: + if n_iter and count >= n_iter: + return err + err = self.step() + count = count + 1 + + return count + + +def boundary_condition(x, y): + """Used to set the boundary condition for the grid of points. + Change this as you feel fit.""" + return (x ** 2 - y ** 2) + + +# def test(nmin=5, nmax=30, dn=5, eps=1.0e-16, n_iter=0, stepper='numeric'): +# iters = [] +# n_grd = numpy.arange(nmin, nmax, dn) +# times = [] +# for i in n_grd: +# g = Grid(nx=i, ny=i) +# g.set_boundary_condition(boundary_condition) +# s = LaplaceSolver(g, stepper) +# t1 = time.clock() +# iters.append(s.solve(n_iter=n_iter, eps=eps)) +# dt = time.clock() - t1 +# times.append(dt) +# print("Solution for nx = ny = %d, took %f seconds" % (i, dt)) +# return (n_grd ** 2, iters, times) + + +def solve_laplace(nx=500, ny=500, eps=1.0e-16, n_iter=1000): + g = Grid(nx, ny) + g.set_boundary_condition(boundary_condition) + s = LaplaceSolver(g) + s.solve(n_iter=n_iter, eps=eps) diff --git a/cmlq_examples/load_papi_to_db.py b/cmlq_examples/load_papi_to_db.py new file mode 100644 index 00000000000000..37344ac1457d78 --- /dev/null +++ b/cmlq_examples/load_papi_to_db.py @@ -0,0 +1,26 @@ +import json +from collections import defaultdict +import sys +import numpy as np + +import sqlite3 +import json +from datetime import datetime + +data = None +with open(sys.argv[1], 'r') as f: + data = json.load(f) + +dbname = f'perfdata-{datetime.now().strftime("%Y-%m-%d-%H:%M:%S")}.db' if len(sys.argv) <= 2 else sys.argv[2] + +conn = sqlite3.connect(dbname) +conn.execute( + "CREATE TABLE regions (region_id int not null, name text not null, parent_id int, real_time int, cycles int);") +regions = data["threads"]["0"]["regions"] + +for region_id in regions: + region = regions[region_id] + conn.execute("INSERT INTO regions (region_id, name, parent_id, real_time, cycles) VALUES (?, ?, ?, ?, ?)", + (int(region_id), region["name"], int(region["parent_region_id"]), int(region["real_time_nsec"]), + int(region["cycles"]))) +conn.commit() \ No newline at end of file diff --git a/cmlq_examples/nbody.py b/cmlq_examples/nbody.py new file mode 100644 index 00000000000000..7be8e84ccea4ec --- /dev/null +++ b/cmlq_examples/nbody.py @@ -0,0 +1,163 @@ +# Adapted from https://github.com/pmocz/nbody-python/blob/master/nbody.py +# TODO: Add GPL-3.0 License + +import numpy as np +""" +Create Your Own N-body Simulation (With Python) +Philip Mocz (2020) Princeton Univeristy, @PMocz +Simulate orbits of stars interacting due to gravity +Code calculates pairwise forces according to Newton's Law of Gravity +""" + + +__rewrite__ = False + +import contextlib + + +@contextlib.contextmanager +def disabled_cmlq(): + global __rewrite__ + before = __rewrite__ + __rewrite__ = False + yield + __rewrite__ = before + + +counter = 0 + +def check_div_result(left, right, result): + global counter + counter += 1 + with disabled_cmlq(): + correct_result = left / right + if not np.allclose(correct_result, result): + print(f"+++++++++++++++ OPERATION DOES NOT MATCH (Iteration {counter} +++++++++++++++") + print("Left:", left) + print() + print("Right:", right) + print() + print("Correct Result:", correct_result) + print() + print("Wrong Result:", result) + + + +def getAcc(pos, mass, G, softening): + # print("getAcc") + """ + Calculate the acceleration on each particle due to Newton's Law + pos is an N x 3 matrix of positions + mass is an N x 1 vector of masses + G is Newton's Gravitational constant + softening is the softening length + a is N x 3 matrix of accelerations + """ + # positions r = [x,y,z] for all particles + x = pos[:, 0:1] + y = pos[:, 1:2] + z = pos[:, 2:3] + + # matrix that stores all pairwise particle separations: r_j - r_i + dx = x.T - x + dy = y.T - y + dz = z.T - z + + # matrix that stores 1/r^3 for all particle pairwise particle separations + inv_r3 = (dx**2 + dy**2 + dz**2 + softening**2) + inv_r3[inv_r3 > 0] = inv_r3[inv_r3 > 0]**(-1.5) + + ax = G * (dx * inv_r3) @ mass + ay = G * (dy * inv_r3) @ mass + az = G * (dz * inv_r3) @ mass + + # pack together the acceleration components + a = np.hstack((ax, ay, az)) + + return a + + +def getEnergy(pos, vel, mass, G): + global counter + # print("getEnergy") + """ + Get kinetic energy (KE) and potential energy (PE) of simulation + pos is N x 3 matrix of positions + vel is N x 3 matrix of velocities + mass is an N x 1 vector of masses + G is Newton's Gravitational constant + KE is the kinetic energy of the system + PE is the potential energy of the system + """ + # Kinetic Energy: + # KE = 0.5 * np.sum(np.sum( mass * vel**2 )) + # breakpoint() + KE = 0.5 * np.sum(mass * vel**2) + + # Potential Energy: + + # positions r = [x,y,z] for all particles + x = pos[:, 0:1] + y = pos[:, 1:2] + z = pos[:, 2:3] + + # matrix that stores all pairwise particle separations: r_j - r_i + dx = x.T - x + dy = y.T - y + dz = z.T - z + + # matrix that stores 1/r for all particle pairwise particle separations + inv_r = np.sqrt(dx**2 + dy**2 + dz**2) + left = 1.0 + right = inv_r[inv_r > 0] + if counter == 1: + breakpoint() + division = left / right + check_div_result(left, right, division) + inv_r[inv_r > 0] = division + + # sum over upper triangle, to count each interaction only once + # PE = G * np.sum(np.sum(np.triu(-(mass*mass.T)*inv_r,1))) + PE = G * np.sum(np.triu(-(mass * mass.T) * inv_r, 1)) + + return KE, PE + + +def nbody(mass, pos, vel, N, Nt, dt, G, softening): + + # Convert to Center-of-Mass frame + vel -= np.mean(mass * vel, axis=0) / np.mean(mass) + + # calculate initial gravitational accelerations + acc = getAcc(pos, mass, G, softening) + + # calculate initial energy of system + KE = np.ndarray(Nt + 1, dtype=np.float64) + PE = np.ndarray(Nt + 1, dtype=np.float64) + KE[0], PE[0] = getEnergy(pos, vel, mass, G) + + t = 0.0 + + # Simulation Main Loop + for i in range(Nt): + # print("loop") + # (1/2) kick + vel += acc * dt / 2.0 + + # drift + pos += vel * dt + + # update accelerations + acc = getAcc(pos, mass, G, softening) + + # (1/2) kick + vel += acc * dt / 2.0 + + # update time + t += dt + + # get energy of system + KE[i + 1], PE[i + 1] = getEnergy(pos, vel, mass, G) + + return KE, PE + diff --git a/cmlq_examples/pairwise.py b/cmlq_examples/pairwise.py new file mode 100644 index 00000000000000..ceabcff04a7960 --- /dev/null +++ b/cmlq_examples/pairwise.py @@ -0,0 +1,15 @@ +import numpy as np + +from util import check_mul_result + + +def pairwise(): + # for i in range(10): + # test = np.array(0, dtype=np.int64) + # sum = test * 1.0 + # check_mul_result(test, 1.0, sum) + # + # return sum + + pts = np.linspace(0,10,20000).reshape(200,100) + return np.sum((pts[None,:] - pts[:, None])**2, -1)**0.5 \ No newline at end of file diff --git a/cmlq_examples/ph_arc_distance.py b/cmlq_examples/ph_arc_distance.py new file mode 100644 index 00000000000000..ce855a30b7223e --- /dev/null +++ b/cmlq_examples/ph_arc_distance.py @@ -0,0 +1,15 @@ +import numpy as np + +def arc_distance(): + N = 10000 + import numpy as np + np.random.seed(0) + theta_1, phi_1, theta_2, phi_2 = np.random.randn(N), np.random.randn(N), np.random.randn(N), np.random.randn(N) + + + """ + Calculates the pairwise arc distance between all points in vector a and b. + """ + temp = np.sin((theta_2-theta_1)/2)**2+np.cos(theta_1)*np.cos(theta_2)*np.sin((phi_2-phi_1)/2)**2 + distance_matrix = 2 * (np.arctan2(np.sqrt(temp),np.sqrt(1-temp))) + return distance_matrix \ No newline at end of file diff --git a/cmlq_examples/print_papi.py b/cmlq_examples/print_papi.py new file mode 100644 index 00000000000000..16b70bbb7a3c8a --- /dev/null +++ b/cmlq_examples/print_papi.py @@ -0,0 +1,58 @@ +import json +from collections import defaultdict +import sys +import numpy as np + +import sqlite3 +import json +from datetime import datetime + +import functools + +conn = sqlite3.connect(sys.argv[1]) + + +def calculate_data(region_data): + count = len(region_data) + assert count > 0 + + time_median = np.median([int(region["real_time_nsec"]) for region in region_data]) + cycles_median = np.median([int(region["cycles"]) for region in region_data]) + + return time_median, cycles_median, count + + +indexed = defaultdict(list) + +children = defaultdict(lambda: defaultdict(list)) +region_names = ("add_f_f_iterator_loop", "cmlq_adouble_add_adouble_iterator_loop") + +for region in region_names: + cursor = conn.execute("SELECT * FROM regions WHERE name = ?", (region,)) + region_id = None + for data in cursor: + region_id = data[0] + indexed[region].append({"cycles": data[4], "real_time_nsec": data[3]}) + + cursor = conn.execute("SELECT * FROM regions WHERE parent_id = ?", (region_id,)) + for data in cursor: + children[region][data[1]].append({"cycles": data[4], "real_time_nsec": data[3]}) + +baseline, other = region_names + + +def print_data(baseline, other, data_baseline, data_other): + time_baseline, cycles_baseline, count_baseline = calculate_data(data_baseline[baseline]) + time_other, cycles_other, count_other = calculate_data(data_other[other]) + print(f"{baseline} ({count_baseline}): {time_baseline} ns, {cycles_baseline} cycles") + print(f"{other} ({count_other}): {time_other} ns, {cycles_other} cycles") + print(f"Difference: {time_baseline / time_other:.2f}x time, {cycles_baseline / cycles_other:.2f}x cycles") + print() + + +print_data(baseline, other, indexed, indexed) +print("Children:") +for child_region in children[baseline]: + print_data(child_region, child_region, children[baseline], children[other]) + + diff --git a/cmlq_examples/resnet.py b/cmlq_examples/resnet.py new file mode 100644 index 00000000000000..6bf36b2ebcdbd5 --- /dev/null +++ b/cmlq_examples/resnet.py @@ -0,0 +1,64 @@ +import numpy as np + + +def relu(x): + return np.maximum(x, 0) + + +# Deep learning convolutional operator (stride = 1) +def conv2d(input, weights): + K = weights.shape[0] # Assuming square kernel + N = input.shape[0] + H_out = input.shape[1] - K + 1 + W_out = input.shape[2] - K + 1 + C_out = weights.shape[3] + output = np.empty((N, H_out, W_out, C_out), dtype=np.float32) + + # Loop structure adapted from https://github.com/SkalskiP/ILearnDeepLearning.py/blob/ba0b5ba589d4e656141995e8d1a06d44db6ce58d/01_mysteries_of_neural_networks/06_numpy_convolutional_neural_net/src/layers/convolutional.py#L88 + for i in range(H_out): + for j in range(W_out): + output[:, i, j, :] = np.sum( + input[:, i:i + K, j:j + K, :, np.newaxis] * + weights[np.newaxis, :, :, :], + axis=(1, 2, 3), + ) + + return output + + +# Batch normalization operator, as used in ResNet +def batchnorm2d(x, eps=1e-5): + mean = np.mean(x, axis=0, keepdims=True) + std = np.std(x, axis=0, keepdims=True) + return (x - mean) / np.sqrt(std + eps) + + +# Bottleneck residual block (after initial convolution, without downsampling) +# in the ResNet-50 CNN (inference) +def resnet(N, W, H, C1, C2): + print("Running resenet") + from numpy.random import default_rng + rng = default_rng(42) + + # Input + input = rng.random((N, H, W, C1), dtype=np.float32) + # Weights + conv1 = rng.random((1, 1, C1, C2), dtype=np.float32) + conv2 = rng.random((3, 3, C2, C2), dtype=np.float32) + conv3 = rng.random((1, 1, C2, C1), dtype=np.float32) + return (input, conv1, conv2, conv3) + + # Pad output of first convolution for second convolution + padded = np.zeros((input.shape[0], input.shape[1] + 2, input.shape[2] + 2, + conv1.shape[3])) + + padded[:, 1:-1, 1:-1, :] = conv2d(input, conv1) + x = batchnorm2d(padded) + x = relu(x) + + x = conv2d(x, conv2) + x = batchnorm2d(x) + x = relu(x) + x = conv2d(x, conv3) + x = batchnorm2d(x) + return relu(x + input) diff --git a/cmlq_examples/simulate_stack_effect.py b/cmlq_examples/simulate_stack_effect.py new file mode 100644 index 00000000000000..39018a9b0ad4c3 --- /dev/null +++ b/cmlq_examples/simulate_stack_effect.py @@ -0,0 +1,49 @@ +def find_provenance_backward_stack_position(instructions, initial_stack, initial_ip, stack_position): + last_provenance = {} + + model_stack = initial_stack[:] + stack_ptr = len(model_stack) - 1 + ip = initial_ip + + while ip >= 0: + instruction = instructions[ip] + operation, pop_count, push_count = instruction + + # Update the sources of the model_stack positions + for i in range(push_count): + if push_count > 0: + last_provenance[stack_ptr - i] = instruction + + stack_ptr = stack_ptr - push_count + pop_count + + if stack_position in last_provenance: + return last_provenance[stack_position] + + + + # Update the instruction pointer + ip -= 1 + + return None # Stack position not found + +# Example instructions: (operation, pop_count, push_count) +instructions = [ + ('LOAD_GLOBAL', 0, 1), + ('LOAD_ATTR', 1, 1), + ('LOAD_FAST', 0, 1), + ('LOAD_FAST', 0, 1), + ('LOAD_CONST', 0, 1), + ('BINARY_OP', 2, 1), + ('BINARY_OP', 2, 1), +] + +initial_stack = [5, 4, 3, 2, 1] +initial_ip = len(instructions) - 1 # Current instruction pointer +stack_position_to_find = 3 # Indexing from the top of the stack + +provenance = find_provenance_backward_stack_position(instructions, initial_stack, initial_ip, stack_position_to_find) + +if provenance: + print(f"The stack position {stack_position_to_find} came from instruction: {provenance}") +else: + print(f"The stack position {stack_position_to_find} does not have a known provenance.") diff --git a/cmlq_examples/specialconvolve.py b/cmlq_examples/specialconvolve.py new file mode 100644 index 00000000000000..40bab58474542a --- /dev/null +++ b/cmlq_examples/specialconvolve.py @@ -0,0 +1,7 @@ + +def specialconvolve(): + import numpy as np + a = np.arange(100*10000, dtype=np.uint32).reshape(1000,1000) + rowconvol = a[1:-1,:] + a[:-2,:] + a[2:,:] + colconvol = rowconvol[:,1:-1] + rowconvol[:,:-2] + rowconvol[:,2:] - 9*a[1:-1,1:-1] + return colconvol diff --git a/cmlq_examples/test_opt.py b/cmlq_examples/test_opt.py new file mode 100644 index 00000000000000..a616ae5db86aa6 --- /dev/null +++ b/cmlq_examples/test_opt.py @@ -0,0 +1,535 @@ +import sys +from pprint import pprint + +import numpy as np +import numpy.typing as npt +import laplace + +import dis +import os +from timeit import timeit + +import channel_flow +import nbody +import floyd_warshall +import adi +import jacobi_2d +import adist +import vadv +import correlat +import azimhist +import mandelbrot +import fdtd2d +import gramschmidt + +import better_exceptions +profile = "PROFILE" in os.environ +test_numba = len(sys.argv) > 1 and sys.argv[1] == "numba" + +def writeinst(opc: str, arg: int = 0): + "Makes life easier in writing python bytecode" + nb = max(1, -(-arg.bit_length() // 8)) + ab = arg.to_bytes(nb, sys.byteorder) + ext_arg = dis._all_opmap['EXTENDED_ARG'] + inst = bytearray() + for i in range(nb - 1): + inst.append(ext_arg) + inst.append(ab[i]) + inst.append(dis._all_opmap[opc]) + inst.append(ab[-1]) + + return bytes(inst) + + +def patch(function, patches, stack_adjustment=0): + code = dis.Bytecode(function, show_caches=True) + bytelist = [] + for instr in code: + name = instr.opname + arg = instr.arg + if instr.offset in patches: + patch = patches[instr.offset] + name = patch[0] + if patch[1] is not None: + arg = patch[1] + + if arg is None: + arg = 0 + bytelist.append(writeinst(name, arg)) + + bytes = b"".join(bytelist) + + orig = function.__code__ + function.__code__ = orig.replace(co_code=bytes, co_consts=orig.co_consts, co_names=orig.co_names, + co_flags=orig.co_flags, co_stacksize=orig.co_stacksize + stack_adjustment) + + +state1 = np.random.RandomState() +state2 = np.random.RandomState() + +n = 10 +rng = np.random.default_rng() +rand_array = rng.standard_normal(n) + + +def normal_fast(): + global n + global rand_array + noise = rand_array * 3 + arange = np.arange(n) + constant = (n / 23) + div = arange / constant + breakpoint() + sin = np.sin(div) + sin_minus = sin - 0.3 + pulses = np.maximum(sin_minus, -5.0) + pulses_multiply = pulses * 300 + pulses_plus_noise = (pulses_multiply) + noise + waveform = (pulses_plus_noise).astype(np.int16) + return waveform + + +def patch_normal_fast(): + patch(normal_fast, { + # 14: ("NP_AFLOAT_MULTIPLY_FLOAT", 1), # replace BINARY_OP multiply + # 92: ("NP_ALONG_DIVIDE_FLOAT", 1), # replace BINARY_OP divide + # 98: ("JUMP_FORWARD", 14), + # 130: ("NP_AFLOAT_SIN", 3), # replace CALL sin + # 144: ("NP_AFLOAT_SUBTRACT_FLOAT", 1), # replace subtract + # 150: ("JUMP_FORWARD", 14), + # 184: ("NP_AFLOAT_MAXIMUM", 3), # replace CALL maximum + # 198: ("NP_AFLOAT_MULTIPLY_FLOAT", 1), # replace pulses multiplication + # 208: ("NP_AFLOAT_ADD_AFLOAT", 1), # replace pulses plus noise + }, 1) + global __rewrite__ + global __lltrace__ + __rewrite__ = True + __lltrace__ = True + + +def normal_legacy(): + global n + global rand_array + noise = rand_array * 3 + arange = np.arange(n) + constant = (n / 23) + div = arange / constant + sin = np.sin(div) + sin_minus = sin - 0.3 + pulses = np.maximum(sin_minus, -5.0) + pulses_multiply = pulses * 300 + pulses_plus_noise = (pulses_multiply) + noise + waveform = (pulses_plus_noise).astype(np.int16) + return waveform + + +def laplace_solve_fast(): + laplace.solve_laplace() + + +def patch_laplace(): + os.environ["CMLQ_REWRITE"] = "1" + + # patch(laplace.LaplaceSolver.step, { + # 208: ("NP_AFLOAT_ADD_AFLOAT", 1), + # 214: ("NP_AFLOAT_MULTIPLY_FLOAT", 1), + # 258: ("NP_AFLOAT_ADD_AFLOAT", 1), + # 264: ("NP_AFLOAT_MULTIPLY_FLOAT", 1), + # 268: ("NP_AFLOAT_ADD_AFLOAT", 1), + # 274: ("NP_AFLOAT_MULTIPLY_FLOAT", 1), + # 342: ("NP_AFLOAT_SUBTRACT_FLOAT", 1), + # # 362: ("NP_AFLOAT_SUBTRACT_FLOAT", 1), + # }, 0) + + +def laplace_solve_baseline(): + laplace.solve_laplace() + + +def test_channel_flow(): + nx = 201 + ny = 201 + + u = np.zeros((ny, nx), dtype=np.float64) + v = np.zeros((ny, nx), dtype=np.float64) + p = np.ones((ny, nx), dtype=np.float64) + dx = 2 / (nx - 1) + dy = 2 / (ny - 1) + dt = .1 / ((nx - 1) * (ny - 1)) + + nit = 20 + rho = 1.0 + nu = 0.1 + F = 1.0 + channel_flow.channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F) + + +def patch_channel_flow(): + pass + + +def test_nbody(): + N = 100 + tEnd = 9.0 + dt = 0.01 + G = 1.0 + softening = 0.1 + from numpy.random import default_rng + rng = default_rng(42) + breakpoint() + mass = 20.0 * np.ones((N, 1)) / N # total mass of particles is 20 + pos = rng.random((N, 3)) # randomly selected positions and velocities + vel = rng.random((N, 3)) + Nt = int(np.ceil(tEnd / dt)) + return nbody.nbody(mass, pos, vel, N, Nt, dt, G, softening) + + +__rewrite__ = False + + +def patch_nbody(): + os.environ["CMLQ_REWRITE"] = "1" + + patch(nbody.getAcc, { + # 92: ("NP_AFLOAT_SUBTRACT_AFLOAT", 1), + # 122: ("NP_AFLOAT_SUBTRACT_AFLOAT", 1), + # 152: ("NP_AFLOAT_SUBTRACT_AFLOAT", 1), + }, 0) + + patch(nbody.getEnergy, { + # 56: ("NP_FLOAT_MULTIPLY_NPFLOAT", 1), + # 152: ("NP_AFLOAT_SUBTRACT_AFLOAT", 1), + # 182: ("NP_AFLOAT_SUBTRACT_AFLOAT", 1), + # 212: ("NP_AFLOAT_SUBTRACT_AFLOAT", 1), + # 264: ("NP_AFLOAT_ADD_AFLOAT", 1), + # 276: ("NP_AFLOAT_ADD_AFLOAT", 1), + # 440: ("NP_FLOAT_MULTIPLY_NPFLOAT", 1), + }, 0) + + +def test_floyd_marshall(): + return floyd_warshall.kernel(850) + + +def test_adi(): + return adi.kernel(50, 500) + + +def test_jacobi2d(): + return jacobi_2d.kernel(1000, 2800) + + +def test_adist(): + return adist.arc_distance(10000000) + + +def test_vadv(): + return vadv.vadv(180, 180, 160) + + +def test_correlat(): + return correlat.kernel(3200, 4000) + + +def test_gramschmidt(): + return gramschmidt.kernel(600, 500) + +def test_azimhist(): + return azimhist.kernel(1000000, 1000) + + +def test_mandelbrot(): + return mandelbrot.mandelbrot(-2.25, 0.75, -1.50, 1.50, 1000, 1000, 100, horizon=2.0) + + +def test_fdtd2d(): + return fdtd2d.kernel(500, 1000, 1200) + + +def enable_cmlq(): + if "BASELINE" not in os.environ: + os.environ["CMLQ_REWRITE"] = "1" + + +def test_subscript(): + a = np.array([[1, 2, 3], [4, 5, 6]], np.float64) + + for i in range(100): + breakpoint() + a[1:, :] = a[1:, :] - 0.5 + + return a; + +def do_nothing(): + pass + + +N = 20000 + +reps = 3 + +# baseline = normal_legacy +# cmlq = normal_fast +# dis_fun = normal_fast +# patch_cmlq = enable_cmlq + +# baseline = laplace_solve_baseline +# cmlq = laplace_solve_fast +# dis_fun = laplace.LaplaceSolver.step +# patch_cmlq = enable_cmlq + +# baseline = test_channel_flow +# cmlq = test_channel_flow +# dis_fun = channel_flow.pressure_poisson_periodic +# patch_cmlq = enable_cmlq + +# baseline = test_nbody +# cmlq = test_nbody +# dis_fun = nbody.getEnergy +# patch_cmlq = enable_cmlq + +# baseline = test_floyd_marshall +# cmlq = test_floyd_marshall +# dis_fun = floyd_warshall.kernel +# patch_cmlq = enable_cmlq + +baseline = test_adi +cmlq = test_adi +dis_fun = adi.kernel +patch_cmlq = enable_cmlq + +# baseline = test_jacobi2d +# cmlq = test_jacobi2d +# dis_fun = jacobi_2d.kernel +# patch_cmlq = enable_cmlq + +# baseline = test_fdtd2d +# cmlq = test_fdtd2d +# dis_fun = fdtd2d.kernel +# patch_cmlq = enable_cmlq + + +# baseline = test_adist +# cmlq = test_adist +# dis_fun = adist.arc_distance +# patch_cmlq = enable_cmlq + +# baseline = test_vadv +# cmlq = test_vadv +# dis_fun = vadv.vadv +# patch_cmlq = enable_cmlq + +# baseline = test_correlat +# cmlq = test_correlat +# dis_fun = correlat.kernel +# patch_cmlq = enable_cmlq + + +# baseline = test_azimhist +# cmlq = test_azimhist +# dis_fun = azimhist.kernel +# patch_cmlq = enable_cmlq + + +# baseline = test_mandelbrot +# cmlq = test_mandelbrot +# dis_fun = mandelbrot.mandelbrot +# patch_cmlq = enable_cmlq + +# baseline = test_gramschmidt +# cmlq = test_gramschmidt +# dis_fun = gramschmidt.kernel +# patch_cmlq = enable_cmlq + +# baseline = test_subscript +# cmlq = test_subscript +# dis_fun = test_subscript +# patch_cmlq = enable_cmlq + + +def check_array_equality(a, b): + print(a.dtype) + if a.dtype == np.float64: + return np.allclose(a, b) + if a.dtype == np.int32: + return np.all(np.equal(a, b)) + if a.dtype == np.complex128: + return np.all(np.equal(a, b)) + + assert False, "Unsupported dtype" + + +cmlq_name = cmlq.__name__ + +if not profile: + print("===== BASELINE =====") + time_old = timeit(f"{baseline.__name__}()", globals=globals(), number=reps) + baseline_result = baseline() + # time_old = 1 +else: + print("Profiling enabled") + +if not test_numba: + + print(f"===== {cmlq_name} BEFORE PATCH =====") + dis.dis(dis_fun, show_caches=False) + patch_cmlq() + + print("===== CMLQ =====") + try: + time_after_patch = timeit(f"{cmlq_name}()", globals=globals(), number=reps) + except Exception as e: + print("===== BROKEN REWRITE =====") + dis.dis(dis_fun, show_caches=False, adaptive=True) + raise e + time_new = time_after_patch + + print(f"===== {cmlq_name} AFTER RUN =====") + dis.dis(dis_fun, show_caches=False, adaptive=True) + + if not profile: + cmlq_result = cmlq() + num_instructions = len([i for i in dis.get_instructions(dis_fun)]) + num_quickened = len(list([i for i in dis.get_instructions(dis_fun) if i.opname == "EXTERNAL"])) + print(f"{num_quickened} / {num_instructions} instructions quickened") + + + if baseline_result is not None: + print(type(baseline_result)) + equal = True + if isinstance(baseline_result, tuple): + for baseline, cmlq in zip(baseline_result, cmlq_result): + equality = check_array_equality(baseline, cmlq) + equal = equal and equality + else: + equal = check_array_equality(baseline_result, cmlq_result) + if not equal: + print("Results do not match") + print("Baseline:", baseline_result) + print("CMLQ:", cmlq_result) + else: + print("No baseline result reported") + +else: + import numba + + + @numba.jit(nopython=True) + def normal_jit(): + global n + # with numba.objmode(rand_array="float64[:]"): + # rng = np.random.default_rng() + # rand_array = rng.standard_normal(n) + rand_array = np.arange(n) + noise = rand_array * 3 + print(noise) + arange = np.arange(n) + constant = (n / 23) + div = arange / constant + print(div) + sin = np.sin(div) + print(sin) + sin_minus = sin - 0.3 + pulses = np.maximum(sin_minus, 0.0) + print(pulses) + pulses_multiply = pulses * 300 + pulses_plus_noise = (pulses_multiply) + noise + waveform = (pulses_plus_noise).astype(np.int16) + return waveform + + + @numba.jit(nopython=True, parallel=False, fastmath=True) + def kernel(TSTEPS, N, u): + + v = np.empty(u.shape, dtype=u.dtype) + p = np.empty(u.shape, dtype=u.dtype) + q = np.empty(u.shape, dtype=u.dtype) + + DX = 1.0 / N + DY = 1.0 / N + DT = 1.0 / TSTEPS + B1 = 2.0 + B2 = 1.0 + mul1 = B1 * DT / (DX * DX) + mul2 = B2 * DT / (DY * DY) + + a = -mul1 / 2.0 + b = 1.0 + mul2 + c = a + d = -mul2 / 2.0 + e = 1.0 + mul2 + f = d + + for t in range(1, TSTEPS + 1): + v[0, 1:N - 1] = 1.0 + p[1:N - 1, 0] = 0.0 + q[1:N - 1, 0] = v[0, 1:N - 1] + for j in range(1, N - 1): + p[1:N - 1, j] = -c / (a * p[1:N - 1, j - 1] + b) + q[1:N - 1, + j] = (-d * u[j, 0:N - 2] + + (1.0 + 2.0 * d) * u[j, 1:N - 1] - f * u[j, 2:N] - + a * q[1:N - 1, j - 1]) / (a * p[1:N - 1, j - 1] + b) + v[N - 1, 1:N - 1] = 1.0 + for j in range(N - 2, 0, -1): + v[j, 1:N - 1] = p[1:N - 1, j] * v[j + 1, 1:N - 1] + q[1:N - 1, j] + + u[1:N - 1, 0] = 1.0 + p[1:N - 1, 0] = 0.0 + q[1:N - 1, 0] = u[1:N - 1, 0] + for j in range(1, N - 1): + p[1:N - 1, j] = -f / (d * p[1:N - 1, j - 1] + e) + q[1:N - 1, + j] = (-a * v[0:N - 2, j] + + (1.0 + 2.0 * a) * v[1:N - 1, j] - c * v[2:N, j] - + d * q[1:N - 1, j - 1]) / (d * p[1:N - 1, j - 1] + e) + u[1:N - 1, N - 1] = 1.0 + for j in range(N - 2, 0, -1): + u[1:N - 1, j] = p[1:N - 1, j] * u[1:N - 1, j + 1] + q[1:N - 1, j] + + + print("Running with numba") + + + def call_kernel(): + u = np.fromfunction(lambda i, j: (i + N - j) / N, (N, N), dtype=np.float64) + kernel(5, 100, u) + + + # jit = normal_jit() + jit = call_kernel + + # warmup + jit() + time_new = timeit(f"{jit.__name__}()", globals=globals(), number=reps) + llvm_code = kernel.inspect_llvm() + asm_code = kernel.inspect_asm() + compile_result = kernel.overloads[kernel.signatures[0]] + +if not profile: + print(f"OLD: {time_old}") +print(f"NEW: {time_new}") + +if not profile: + print(f"OLD/NEW: {time_old / time_new}") + +pprint(np.core.multiarray.get_cmlq_stats()) +pprint(get_cmlq_functions()) +pprint(get_cmlq_stats(dis_fun.__code__)) + +if test_numba and len(sys.argv) > 2: + if sys.argv[2] == "show-llvm": + for v, k in llvm_code.items(): + print(v, k) + + if sys.argv[2] == "show-asm": + for v, k in asm_code.items(): + print(v, k) + + if sys.argv[2] == "show-passes": + nopython_times = compile_result.metadata['pipeline_times']['nopython'] + for k in nopython_times.keys(): + print(k) + +# print(b) +# print((b - a) / a) diff --git a/cmlq_examples/util.py b/cmlq_examples/util.py new file mode 100644 index 00000000000000..9afb2f072f2051 --- /dev/null +++ b/cmlq_examples/util.py @@ -0,0 +1,77 @@ +from datetime import datetime +import os +import contextlib +import inspect +import numpy as np + +from collections import defaultdict + +counters = defaultdict(int) + +@contextlib.contextmanager +def disabled_cmlq(): + before = os.environ["CMLQ_REWRITE"] if "CMLQ_REWRITE" in os.environ else None + if before: + del os.environ["CMLQ_REWRITE"] + yield + if before is not None: + os.environ["CMLQ_REWRITE"] = before + + +def report_error(left, right, opt_result, correct_result, operator): + global counters + counter = counters[operator] + if not np.allclose(correct_result, opt_result): + print(f"+++++++++++++++ {operator} DOES NOT MATCH (Iteration {counter}) +++++++++++++++") + if getattr(left, "dtype", None) is None: + print("Left Type:", type(left)) + else: + print("Left Type:", left.dtype) + print("Left:", left) + print() + if getattr(right, "dtype", None) is None: + print("Right Type:", type(right)) + else: + print("Right Type:", right.dtype) + print("Right:", right) + print() + print("Correct Result:", correct_result) + print() + print("Wrong Result:", opt_result) + print("Line:", inspect.currentframe().f_back.f_back.f_lineno) + raise(Exception) + + +def check_div_result(left, right, opt_result): + global counters + operator = "div" + counters[operator] += 1 + with disabled_cmlq(): + correct_result = left / right + report_error(left, right, opt_result, correct_result, operator) + +def check_add_result(left, right, opt_result): + global counters + operator = "add" + counters[operator] += 1 + with disabled_cmlq(): + correct_result = left + right + report_error(left, right, opt_result, correct_result, operator) + +def check_sub_result(left, right, opt_result): + global counters + operator = "minus" + counters[operator] += 1 + with disabled_cmlq(): + correct_result = left - right + report_error(left, right, opt_result, correct_result, operator) + +def check_mul_result(left, right, opt_result): + global counters + operator = "multiply" + if "CMLQ_REWRITE" not in os.environ or os.environ["CMLQ_REWRITE"] == "0": + return + counters[operator] += 1 + with disabled_cmlq(): + correct_result = left * right + report_error(left, right, opt_result, correct_result, operator) \ No newline at end of file diff --git a/cmlq_examples/vadv.py b/cmlq_examples/vadv.py new file mode 100644 index 00000000000000..50dd8e49a9bfe7 --- /dev/null +++ b/cmlq_examples/vadv.py @@ -0,0 +1,90 @@ +import numpy as np + +# Sample constants +BET_M = 0.5 +BET_P = 0.5 + + +# Adapted from https://github.com/GridTools/gt4py/blob/1caca893034a18d5df1522ed251486659f846589/tests/test_integration/stencil_definitions.py#L111 +def vadv(I, J, K): + from numpy.random import default_rng + rng = default_rng(42) + + dtr_stage = 3. / 20. + + # Define arrays + utens_stage = rng.random((I, J, K)) + u_stage = rng.random((I, J, K)) + wcon = rng.random((I + 1, J, K)) + u_pos = rng.random((I, J, K)) + utens = rng.random((I, J, K)) + + I, J, K = utens_stage.shape[0], utens_stage.shape[1], utens_stage.shape[2] + ccol = np.ndarray((I, J, K), dtype=utens_stage.dtype) + dcol = np.ndarray((I, J, K), dtype=utens_stage.dtype) + data_col = np.ndarray((I, J), dtype=utens_stage.dtype) + + for k in range(1): + gcv = 0.25 * (wcon[1:, :, k + 1] + wcon[:-1, :, k + 1]) + cs = gcv * BET_M + + ccol[:, :, k] = gcv * BET_P + bcol = dtr_stage - ccol[:, :, k] + + # update the d column + correction_term = -cs * (u_stage[:, :, k + 1] - u_stage[:, :, k]) + dcol[:, :, k] = (dtr_stage * u_pos[:, :, k] + utens[:, :, k] + + utens_stage[:, :, k] + correction_term) + + # Thomas forward + divided = 1.0 / bcol + ccol[:, :, k] = ccol[:, :, k] * divided + dcol[:, :, k] = dcol[:, :, k] * divided + + for k in range(1, K - 1): + gav = -0.25 * (wcon[1:, :, k] + wcon[:-1, :, k]) + gcv = 0.25 * (wcon[1:, :, k + 1] + wcon[:-1, :, k + 1]) + + as_ = gav * BET_M + cs = gcv * BET_M + + acol = gav * BET_P + ccol[:, :, k] = gcv * BET_P + bcol = dtr_stage - acol - ccol[:, :, k] + + # update the d column + correction_term = -as_ * (u_stage[:, :, k - 1] - + u_stage[:, :, k]) - cs * ( + u_stage[:, :, k + 1] - u_stage[:, :, k]) + dcol[:, :, k] = (dtr_stage * u_pos[:, :, k] + utens[:, :, k] + + utens_stage[:, :, k] + correction_term) + + # Thomas forward + divided = 1.0 / (bcol - ccol[:, :, k - 1] * acol) + ccol[:, :, k] = ccol[:, :, k] * divided + dcol[:, :, k] = (dcol[:, :, k] - (dcol[:, :, k - 1]) * acol) * divided + + for k in range(K - 1, K): + gav = -0.25 * (wcon[1:, :, k] + wcon[:-1, :, k]) + as_ = gav * BET_M + acol = gav * BET_P + bcol = dtr_stage - acol + + # update the d column + correction_term = -as_ * (u_stage[:, :, k - 1] - u_stage[:, :, k]) + dcol[:, :, k] = (dtr_stage * u_pos[:, :, k] + utens[:, :, k] + + utens_stage[:, :, k] + correction_term) + + # Thomas forward + divided = 1.0 / (bcol - ccol[:, :, k - 1] * acol) + dcol[:, :, k] = (dcol[:, :, k] - (dcol[:, :, k - 1]) * acol) * divided + + for k in range(K - 1, K - 2, -1): + datacol = dcol[:, :, k] + data_col[:] = datacol + utens_stage[:, :, k] = dtr_stage * (datacol - u_pos[:, :, k]) + + for k in range(K - 2, -1, -1): + datacol = dcol[:, :, k] - ccol[:, :, k] * data_col[:, :] + data_col[:] = datacol + utens_stage[:, :, k] = dtr_stage * (datacol - u_pos[:, :, k]) diff --git a/config.guess b/config.guess index e81d3ae7c210ba..7f76b6228f73d6 100755 --- a/config.guess +++ b/config.guess @@ -1,14 +1,14 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2021 Free Software Foundation, Inc. +# Copyright 1992-2022 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2021-06-03' +timestamp='2022-01-09' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -60,7 +60,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2021 Free Software Foundation, Inc. +Copyright 1992-2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -437,7 +437,7 @@ case $UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION in # This test works for both compilers. if test "$CC_FOR_BUILD" != no_compiler_found; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS="" $CC_FOR_BUILD -m64 -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then SUN_ARCH=x86_64 @@ -929,6 +929,9 @@ EOF i*:PW*:*) GUESS=$UNAME_MACHINE-pc-pw32 ;; + *:SerenityOS:*:*) + GUESS=$UNAME_MACHINE-pc-serenity + ;; *:Interix*:*) case $UNAME_MACHINE in x86) @@ -1522,6 +1525,9 @@ EOF i*86:rdos:*:*) GUESS=$UNAME_MACHINE-pc-rdos ;; + i*86:Fiwix:*:*) + GUESS=$UNAME_MACHINE-pc-fiwix + ;; *:AROS:*:*) GUESS=$UNAME_MACHINE-unknown-aros ;; diff --git a/config.sub b/config.sub index d74fb6deac942a..dba16e84c77c7d 100755 --- a/config.sub +++ b/config.sub @@ -1,14 +1,14 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2021 Free Software Foundation, Inc. +# Copyright 1992-2022 Free Software Foundation, Inc. # shellcheck disable=SC2006,SC2268 # see below for rationale -timestamp='2021-08-14' +timestamp='2022-01-03' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 3 of the License, or +# the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, but @@ -76,7 +76,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright 1992-2021 Free Software Foundation, Inc. +Copyright 1992-2022 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -1020,6 +1020,11 @@ case $cpu-$vendor in ;; # Here we normalize CPU types with a missing or matching vendor + armh-unknown | armh-alt) + cpu=armv7l + vendor=alt + basic_os=${basic_os:-linux-gnueabihf} + ;; dpx20-unknown | dpx20-bull) cpu=rs6000 vendor=bull @@ -1121,7 +1126,7 @@ case $cpu-$vendor in xscale-* | xscalee[bl]-*) cpu=`echo "$cpu" | sed 's/^xscale/arm/'` ;; - arm64-*) + arm64-* | aarch64le-*) cpu=aarch64 ;; @@ -1304,7 +1309,7 @@ esac if test x$basic_os != x then -# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just +# First recognize some ad-hoc cases, or perhaps split kernel-os, or else just # set os. case $basic_os in gnu/linux*) @@ -1748,7 +1753,8 @@ case $os in | skyos* | haiku* | rdos* | toppers* | drops* | es* \ | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \ | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \ - | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr*) + | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx* | zephyr* \ + | fiwix* ) ;; # This one is extra strict with allowed versions sco3.2v2 | sco3.2v[4-9]* | sco5v6*) diff --git a/configure b/configure index b6f90bcd8c7300..40f2507cab4ca3 100755 --- a/configure +++ b/configure @@ -882,6 +882,7 @@ LIBNSL_LIBS LIBNSL_CFLAGS LIBMPDEC_INTERNAL LIBMPDEC_CFLAGS +PAPI_LIBS MODULE__CTYPES_MALLOC_CLOSURE LIBFFI_LIBS LIBFFI_CFLAGS @@ -1082,6 +1083,8 @@ with_trace_refs enable_pystats with_assertions enable_optimizations +with_instr_stats +with_cmlq with_lto enable_bolt with_strict_overflow @@ -1093,6 +1096,7 @@ with_hash_algorithm with_tzpath with_libs with_system_expat +with_cmlq_papi with_system_libmpdec with_decimal_contextvar enable_loadable_sqlite_extensions @@ -1853,6 +1857,11 @@ Optional Packages: --with-trace-refs enable tracing references for debugging purpose (default is no) --with-assertions build with C assertions enabled (default is no) + --with(out)-instr-stats[=yes|no] + build with per instruction statistics (default is + no) + --with(out)-readline[=always|env|no] + use CMLQ optimization (default is no) --with-lto=[full|thin|no|yes] enable Link-Time-Optimization in any build (default is no) @@ -1877,6 +1886,8 @@ Optional Packages: --with-libs='lib1 ...' link against additional libs (default is no) --with-system-expat build pyexpat module using an installed expat library, see Doc/library/pyexpat.rst (default is no) + --with(out)-cmlq-papi[=yes|no] + build with CMLQ PAPI measurements (default is no) --with-system-libmpdec build _decimal module using an installed libmpdec library, see Doc/library/decimal.rst (default is no) --with-decimal-contextvar @@ -8242,6 +8253,85 @@ else fi + +# Check whether --with-instr-stats was given. +if test ${with_instr_stats+y} +then : + withval=$with_instr_stats; +else $as_nop + with_instr_stats=no + +fi + + +if test "x$with_instr_stats" = xyes +then : + + +printf "%s\n" "#define INSTR_STATS 1" >>confdefs.h + + +fi + + + +# Check whether --with-cmlq was given. +if test ${with_cmlq+y} +then : + withval=$with_cmlq; + case $with_cmlq in #( + always) : + with_cmlq=always ;; #( + env) : + with_cmlq=env ;; #( + no) : + ;; #( + *) : + as_fn_error $? "proper usage is --with(out)-cmlq[=always|env|no]" "$LINENO" 5 + ;; +esac + +else $as_nop + with_cmlq=no + +fi + + +if test "x$with_cmlq" = xalways +then : + + +printf "%s\n" "#define Py_OPT_CMLQ_ALWAYS 1" >>confdefs.h + + +fi + +if test "x$with_cmlq" = xenv +then : + + +printf "%s\n" "#define Py_OPT_CMLQ_ENV 1" >>confdefs.h + + +fi + +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to use cmlq" >&5 +printf %s "checking whether to use cmlq... " >&6; } +if test "x$with_cmlq" = xno +then : + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } + +else $as_nop + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $with_cmlq" >&5 +printf "%s\n" "$with_cmlq" >&6; } + +fi + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking PROFILE_TASK" >&5 printf %s "checking PROFILE_TASK... " >&6; } if test -z "$PROFILE_TASK" @@ -13183,8 +13273,8 @@ then : pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBUUID" >&5 -printf %s "checking for LIBUUID... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for uuid >= 2.20" >&5 +printf %s "checking for uuid >= 2.20... " >&6; } if test -n "$LIBUUID_CFLAGS"; then pkg_cv_LIBUUID_CFLAGS="$LIBUUID_CFLAGS" @@ -13224,7 +13314,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -13233,12 +13323,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBUUID_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "uuid >= 2.20" 2>&1` + LIBUUID_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "uuid >= 2.20" 2>&1` else - LIBUUID_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "uuid >= 2.20" 2>&1` + LIBUUID_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "uuid >= 2.20" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBUUID_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBUUID_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -13364,7 +13454,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -13490,11 +13580,11 @@ LIBS=$save_LIBS else - LIBUUID_CFLAGS=$pkg_cv_LIBUUID_CFLAGS - LIBUUID_LIBS=$pkg_cv_LIBUUID_LIBS + LIBUUID_CFLAGS=$pkg_cv_LIBUUID_CFLAGS + LIBUUID_LIBS=$pkg_cv_LIBUUID_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - have_uuid=yes + have_uuid=yes printf "%s\n" "#define HAVE_UUID_H 1" >>confdefs.h printf "%s\n" "#define HAVE_UUID_GENERATE_TIME_SAFE 1" >>confdefs.h @@ -14072,8 +14162,8 @@ then : pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBFFI" >&5 -printf %s "checking for LIBFFI... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libffi" >&5 +printf %s "checking for libffi... " >&6; } if test -n "$LIBFFI_CFLAGS"; then pkg_cv_LIBFFI_CFLAGS="$LIBFFI_CFLAGS" @@ -14113,7 +14203,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -14122,12 +14212,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBFFI_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libffi" 2>&1` + LIBFFI_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libffi" 2>&1` else - LIBFFI_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libffi" 2>&1` + LIBFFI_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libffi" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBFFI_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBFFI_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -14200,7 +14290,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -14273,11 +14363,11 @@ LIBS=$save_LIBS else - LIBFFI_CFLAGS=$pkg_cv_LIBFFI_CFLAGS - LIBFFI_LIBS=$pkg_cv_LIBFFI_LIBS + LIBFFI_CFLAGS=$pkg_cv_LIBFFI_CFLAGS + LIBFFI_LIBS=$pkg_cv_LIBFFI_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - have_libffi=yes + have_libffi=yes fi fi @@ -14447,6 +14537,87 @@ LIBS=$save_LIBS fi + + + +# Check whether --with-cmlq-papi was given. +if test ${with_cmlq_papi+y} +then : + withval=$with_cmlq_papi; +else $as_nop + with_cmlq_papi=no + +fi + + +have_papi=no +if test "x$with_cmlq_papi" = xyes +then : + + +printf "%s\n" "#define CMLQ_PAPI 1" >>confdefs.h +, + save_CFLAGS=$CFLAGS +save_CPPFLAGS=$CPPFLAGS +save_LDFLAGS=$LDFLAGS +save_LIBS=$LIBS + + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PAPI_library_init in -lpapi" >&5 +printf %s "checking for PAPI_library_init in -lpapi... " >&6; } +if test ${ac_cv_lib_papi_PAPI_library_init+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_check_lib_save_LIBS=$LIBS +LIBS="-lpapi $LIBS" +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ + +/* Override any GCC internal prototype to avoid an error. + Use char because int might match the return type of a GCC + builtin and then its argument prototype would still apply. */ +char PAPI_library_init (); +int +main (void) +{ +return PAPI_library_init (); + ; + return 0; +} +_ACEOF +if ac_fn_c_try_link "$LINENO" +then : + ac_cv_lib_papi_PAPI_library_init=yes +else $as_nop + ac_cv_lib_papi_PAPI_library_init=no +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam \ + conftest$ac_exeext conftest.$ac_ext +LIBS=$ac_check_lib_save_LIBS +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_papi_PAPI_library_init" >&5 +printf "%s\n" "$ac_cv_lib_papi_PAPI_library_init" >&6; } +if test "x$ac_cv_lib_papi_PAPI_library_init" = xyes +then : + + printf "%s\n" "#define HAVE_PAPI 1" >>confdefs.h + + PAPI_CFLAGS=${PAPI_CFLAGS-""} + PAPI_LIBS=${PAPI_LIBS-"-lpapi"} + +fi + + +CFLAGS=$save_CFLAGS +CPPFLAGS=$save_CPPFLAGS +LDFLAGS=$save_LDFLAGS +LIBS=$save_LIBS + +, + +fi + # Check for use of the system libmpdec library { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-system-libmpdec" >&5 printf %s "checking for --with-system-libmpdec... " >&6; } @@ -14590,8 +14761,8 @@ fi pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBNSL" >&5 -printf %s "checking for LIBNSL... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libnsl" >&5 +printf %s "checking for libnsl... " >&6; } if test -n "$LIBNSL_CFLAGS"; then pkg_cv_LIBNSL_CFLAGS="$LIBNSL_CFLAGS" @@ -14631,7 +14802,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -14640,12 +14811,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBNSL_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libnsl" 2>&1` + LIBNSL_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libnsl" 2>&1` else - LIBNSL_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libnsl" 2>&1` + LIBNSL_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libnsl" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBNSL_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBNSL_PKG_ERRORS" >&5 LIBNSL_CFLAGS=${LIBNSL_CFLAGS-""} @@ -14735,7 +14906,7 @@ esac LIBNSL_LIBS=${LIBNSL_LIBS-$libnsl} elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } LIBNSL_CFLAGS=${LIBNSL_CFLAGS-""} @@ -14825,11 +14996,11 @@ esac LIBNSL_LIBS=${LIBNSL_LIBS-$libnsl} else - LIBNSL_CFLAGS=$pkg_cv_LIBNSL_CFLAGS - LIBNSL_LIBS=$pkg_cv_LIBNSL_LIBS + LIBNSL_CFLAGS=$pkg_cv_LIBNSL_CFLAGS + LIBNSL_LIBS=$pkg_cv_LIBNSL_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - have_nis=yes + have_nis=yes fi if test "x$have_nis" = xyes @@ -14883,8 +15054,8 @@ fi pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBSQLITE3" >&5 -printf %s "checking for LIBSQLITE3... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sqlite3 >= 3.7.15" >&5 +printf %s "checking for sqlite3 >= 3.7.15... " >&6; } if test -n "$LIBSQLITE3_CFLAGS"; then pkg_cv_LIBSQLITE3_CFLAGS="$LIBSQLITE3_CFLAGS" @@ -14924,7 +15095,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -14933,12 +15104,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBSQLITE3_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "sqlite3 >= 3.7.15" 2>&1` + LIBSQLITE3_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "sqlite3 >= 3.7.15" 2>&1` else - LIBSQLITE3_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "sqlite3 >= 3.7.15" 2>&1` + LIBSQLITE3_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "sqlite3 >= 3.7.15" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBSQLITE3_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBSQLITE3_PKG_ERRORS" >&5 LIBSQLITE3_CFLAGS=${LIBSQLITE3_CFLAGS-""} @@ -14946,7 +15117,7 @@ fi elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } LIBSQLITE3_CFLAGS=${LIBSQLITE3_CFLAGS-""} @@ -14954,8 +15125,8 @@ printf "%s\n" "no" >&6; } else - LIBSQLITE3_CFLAGS=$pkg_cv_LIBSQLITE3_CFLAGS - LIBSQLITE3_LIBS=$pkg_cv_LIBSQLITE3_LIBS + LIBSQLITE3_CFLAGS=$pkg_cv_LIBSQLITE3_CFLAGS + LIBSQLITE3_LIBS=$pkg_cv_LIBSQLITE3_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -15647,8 +15818,8 @@ for _QUERY in \ pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for TCLTK" >&5 -printf %s "checking for TCLTK... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $_QUERY" >&5 +printf %s "checking for $_QUERY... " >&6; } if test -n "$TCLTK_CFLAGS"; then pkg_cv_TCLTK_CFLAGS="$TCLTK_CFLAGS" @@ -15688,7 +15859,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -15697,24 +15868,24 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - TCLTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$_QUERY" 2>&1` + TCLTK_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "$_QUERY" 2>&1` else - TCLTK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$_QUERY" 2>&1` + TCLTK_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "$_QUERY" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$TCLTK_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$TCLTK_PKG_ERRORS" >&5 - found_tcltk=no + found_tcltk=no elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - found_tcltk=no + found_tcltk=no else - TCLTK_CFLAGS=$pkg_cv_TCLTK_CFLAGS - TCLTK_LIBS=$pkg_cv_TCLTK_LIBS + TCLTK_CFLAGS=$pkg_cv_TCLTK_CFLAGS + TCLTK_LIBS=$pkg_cv_TCLTK_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - found_tcltk=yes + found_tcltk=yes fi fi @@ -15744,8 +15915,8 @@ case $ac_sys_system in #( pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for X11" >&5 -printf %s "checking for X11... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for x11" >&5 +printf %s "checking for x11... " >&6; } if test -n "$X11_CFLAGS"; then pkg_cv_X11_CFLAGS="$X11_CFLAGS" @@ -15785,7 +15956,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -15794,14 +15965,14 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - X11_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "x11" 2>&1` + X11_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "x11" 2>&1` else - X11_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "x11" 2>&1` + X11_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "x11" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$X11_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$X11_PKG_ERRORS" >&5 - as_fn_error $? "Package requirements (x11) were not met: + as_fn_error $? "Package requirements (x11) were not met: $X11_PKG_ERRORS @@ -15812,9 +15983,9 @@ Alternatively, you may set the environment variables X11_CFLAGS and X11_LIBS to avoid the need to call pkg-config. See the pkg-config man page for more details." "$LINENO" 5 elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it is in your PATH or set the PKG_CONFIG environment variable to the full @@ -15827,8 +15998,8 @@ See the pkg-config man page for more details. To get pkg-config, see . See \`config.log' for more details" "$LINENO" 5; } else - X11_CFLAGS=$pkg_cv_X11_CFLAGS - X11_LIBS=$pkg_cv_X11_LIBS + X11_CFLAGS=$pkg_cv_X11_CFLAGS + X11_LIBS=$pkg_cv_X11_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -19699,8 +19870,8 @@ fi pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ZLIB" >&5 -printf %s "checking for ZLIB... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for zlib >= 1.2.0" >&5 +printf %s "checking for zlib >= 1.2.0... " >&6; } if test -n "$ZLIB_CFLAGS"; then pkg_cv_ZLIB_CFLAGS="$ZLIB_CFLAGS" @@ -19740,7 +19911,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -19749,12 +19920,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - ZLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "zlib >= 1.2.0" 2>&1` + ZLIB_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "zlib >= 1.2.0" 2>&1` else - ZLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "zlib >= 1.2.0" 2>&1` + ZLIB_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "zlib >= 1.2.0" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$ZLIB_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$ZLIB_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -19883,7 +20054,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -20012,8 +20183,8 @@ LIBS=$save_LIBS else - ZLIB_CFLAGS=$pkg_cv_ZLIB_CFLAGS - ZLIB_LIBS=$pkg_cv_ZLIB_LIBS + ZLIB_CFLAGS=$pkg_cv_ZLIB_CFLAGS + ZLIB_LIBS=$pkg_cv_ZLIB_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -20047,8 +20218,8 @@ fi pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BZIP2" >&5 -printf %s "checking for BZIP2... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for bzip2" >&5 +printf %s "checking for bzip2... " >&6; } if test -n "$BZIP2_CFLAGS"; then pkg_cv_BZIP2_CFLAGS="$BZIP2_CFLAGS" @@ -20088,7 +20259,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -20097,12 +20268,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - BZIP2_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "bzip2" 2>&1` + BZIP2_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "bzip2" 2>&1` else - BZIP2_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "bzip2" 2>&1` + BZIP2_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "bzip2" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$BZIP2_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$BZIP2_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -20184,7 +20355,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -20266,17 +20437,17 @@ LIBS=$save_LIBS else - BZIP2_CFLAGS=$pkg_cv_BZIP2_CFLAGS - BZIP2_LIBS=$pkg_cv_BZIP2_LIBS + BZIP2_CFLAGS=$pkg_cv_BZIP2_CFLAGS + BZIP2_LIBS=$pkg_cv_BZIP2_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - have_bzip2=yes + have_bzip2=yes fi pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBLZMA" >&5 -printf %s "checking for LIBLZMA... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for liblzma" >&5 +printf %s "checking for liblzma... " >&6; } if test -n "$LIBLZMA_CFLAGS"; then pkg_cv_LIBLZMA_CFLAGS="$LIBLZMA_CFLAGS" @@ -20316,7 +20487,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -20325,12 +20496,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBLZMA_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "liblzma" 2>&1` + LIBLZMA_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "liblzma" 2>&1` else - LIBLZMA_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "liblzma" 2>&1` + LIBLZMA_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "liblzma" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBLZMA_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBLZMA_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -20412,7 +20583,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -20494,11 +20665,11 @@ LIBS=$save_LIBS else - LIBLZMA_CFLAGS=$pkg_cv_LIBLZMA_CFLAGS - LIBLZMA_LIBS=$pkg_cv_LIBLZMA_LIBS + LIBLZMA_CFLAGS=$pkg_cv_LIBLZMA_CFLAGS + LIBLZMA_LIBS=$pkg_cv_LIBLZMA_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } - have_liblzma=yes + have_liblzma=yes fi @@ -21743,8 +21914,8 @@ fi pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBCRYPT" >&5 -printf %s "checking for LIBCRYPT... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libxcrypt >= 3.1.1" >&5 +printf %s "checking for libxcrypt >= 3.1.1... " >&6; } if test -n "$LIBCRYPT_CFLAGS"; then pkg_cv_LIBCRYPT_CFLAGS="$LIBCRYPT_CFLAGS" @@ -21784,7 +21955,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -21793,12 +21964,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBCRYPT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libxcrypt >= 3.1.1" 2>&1` + LIBCRYPT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libxcrypt >= 3.1.1" 2>&1` else - LIBCRYPT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libxcrypt >= 3.1.1" 2>&1` + LIBCRYPT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libxcrypt >= 3.1.1" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBCRYPT_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBCRYPT_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -21884,7 +22055,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -21970,8 +22141,8 @@ LIBS=$save_LIBS else - LIBCRYPT_CFLAGS=$pkg_cv_LIBCRYPT_CFLAGS - LIBCRYPT_LIBS=$pkg_cv_LIBCRYPT_LIBS + LIBCRYPT_CFLAGS=$pkg_cv_LIBCRYPT_CFLAGS + LIBCRYPT_LIBS=$pkg_cv_LIBCRYPT_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -24552,8 +24723,8 @@ then : pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBREADLINE" >&5 -printf %s "checking for LIBREADLINE... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for readline" >&5 +printf %s "checking for readline... " >&6; } if test -n "$LIBREADLINE_CFLAGS"; then pkg_cv_LIBREADLINE_CFLAGS="$LIBREADLINE_CFLAGS" @@ -24593,7 +24764,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -24602,12 +24773,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBREADLINE_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "readline" 2>&1` + LIBREADLINE_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "readline" 2>&1` else - LIBREADLINE_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "readline" 2>&1` + LIBREADLINE_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "readline" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBREADLINE_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBREADLINE_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -24686,7 +24857,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -24765,8 +24936,8 @@ LIBS=$save_LIBS else - LIBREADLINE_CFLAGS=$pkg_cv_LIBREADLINE_CFLAGS - LIBREADLINE_LIBS=$pkg_cv_LIBREADLINE_LIBS + LIBREADLINE_CFLAGS=$pkg_cv_LIBREADLINE_CFLAGS + LIBREADLINE_LIBS=$pkg_cv_LIBREADLINE_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -24783,8 +24954,8 @@ then : pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBEDIT" >&5 -printf %s "checking for LIBEDIT... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libedit" >&5 +printf %s "checking for libedit... " >&6; } if test -n "$LIBEDIT_CFLAGS"; then pkg_cv_LIBEDIT_CFLAGS="$LIBEDIT_CFLAGS" @@ -24824,7 +24995,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -24833,12 +25004,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBEDIT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libedit" 2>&1` + LIBEDIT_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libedit" 2>&1` else - LIBEDIT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libedit" 2>&1` + LIBEDIT_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libedit" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBEDIT_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBEDIT_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -24919,7 +25090,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -25000,8 +25171,8 @@ LIBS=$save_LIBS else - LIBEDIT_CFLAGS=$pkg_cv_LIBEDIT_CFLAGS - LIBEDIT_LIBS=$pkg_cv_LIBEDIT_LIBS + LIBEDIT_CFLAGS=$pkg_cv_LIBEDIT_CFLAGS + LIBEDIT_LIBS=$pkg_cv_LIBEDIT_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -25652,8 +25823,8 @@ then : if test "$ac_sys_system" != "Darwin"; then pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for CURSES" >&5 -printf %s "checking for CURSES... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ncursesw" >&5 +printf %s "checking for ncursesw... " >&6; } if test -n "$CURSES_CFLAGS"; then pkg_cv_CURSES_CFLAGS="$CURSES_CFLAGS" @@ -25693,7 +25864,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -25702,12 +25873,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - CURSES_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ncursesw" 2>&1` + CURSES_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ncursesw" 2>&1` else - CURSES_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ncursesw" 2>&1` + CURSES_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ncursesw" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$CURSES_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$CURSES_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -25771,7 +25942,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -25835,8 +26006,8 @@ LIBS=$save_LIBS else - CURSES_CFLAGS=$pkg_cv_CURSES_CFLAGS - CURSES_LIBS=$pkg_cv_CURSES_LIBS + CURSES_CFLAGS=$pkg_cv_CURSES_CFLAGS + CURSES_LIBS=$pkg_cv_CURSES_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -25852,8 +26023,8 @@ then : pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for CURSES" >&5 -printf %s "checking for CURSES... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ncurses" >&5 +printf %s "checking for ncurses... " >&6; } if test -n "$CURSES_CFLAGS"; then pkg_cv_CURSES_CFLAGS="$CURSES_CFLAGS" @@ -25893,7 +26064,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -25902,12 +26073,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - CURSES_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ncurses" 2>&1` + CURSES_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "ncurses" 2>&1` else - CURSES_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ncurses" 2>&1` + CURSES_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "ncurses" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$CURSES_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$CURSES_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -25969,7 +26140,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -26031,8 +26202,8 @@ LIBS=$save_LIBS else - CURSES_CFLAGS=$pkg_cv_CURSES_CFLAGS - CURSES_LIBS=$pkg_cv_CURSES_LIBS + CURSES_CFLAGS=$pkg_cv_CURSES_CFLAGS + CURSES_LIBS=$pkg_cv_CURSES_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -26087,8 +26258,8 @@ then : pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PANEL" >&5 -printf %s "checking for PANEL... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for panelw" >&5 +printf %s "checking for panelw... " >&6; } if test -n "$PANEL_CFLAGS"; then pkg_cv_PANEL_CFLAGS="$PANEL_CFLAGS" @@ -26128,7 +26299,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -26137,12 +26308,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - PANEL_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "panelw" 2>&1` + PANEL_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "panelw" 2>&1` else - PANEL_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "panelw" 2>&1` + PANEL_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "panelw" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$PANEL_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$PANEL_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -26204,7 +26375,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -26266,8 +26437,8 @@ LIBS=$save_LIBS else - PANEL_CFLAGS=$pkg_cv_PANEL_CFLAGS - PANEL_LIBS=$pkg_cv_PANEL_LIBS + PANEL_CFLAGS=$pkg_cv_PANEL_CFLAGS + PANEL_LIBS=$pkg_cv_PANEL_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -26283,8 +26454,8 @@ then : pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PANEL" >&5 -printf %s "checking for PANEL... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for panel" >&5 +printf %s "checking for panel... " >&6; } if test -n "$PANEL_CFLAGS"; then pkg_cv_PANEL_CFLAGS="$PANEL_CFLAGS" @@ -26324,7 +26495,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -26333,12 +26504,12 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - PANEL_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "panel" 2>&1` + PANEL_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "panel" 2>&1` else - PANEL_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "panel" 2>&1` + PANEL_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "panel" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$PANEL_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$PANEL_PKG_ERRORS" >&5 save_CFLAGS=$CFLAGS @@ -26400,7 +26571,7 @@ LIBS=$save_LIBS elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } save_CFLAGS=$CFLAGS @@ -26462,8 +26633,8 @@ LIBS=$save_LIBS else - PANEL_CFLAGS=$pkg_cv_PANEL_CFLAGS - PANEL_LIBS=$pkg_cv_PANEL_LIBS + PANEL_CFLAGS=$pkg_cv_PANEL_CFLAGS + PANEL_LIBS=$pkg_cv_PANEL_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } @@ -28390,8 +28561,8 @@ then : pkg_failed=no -{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for LIBB2" >&5 -printf %s "checking for LIBB2... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for libb2" >&5 +printf %s "checking for libb2... " >&6; } if test -n "$LIBB2_CFLAGS"; then pkg_cv_LIBB2_CFLAGS="$LIBB2_CFLAGS" @@ -28431,7 +28602,7 @@ fi if test $pkg_failed = yes; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then @@ -28440,21 +28611,21 @@ else _pkg_short_errors_supported=no fi if test $_pkg_short_errors_supported = yes; then - LIBB2_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libb2" 2>&1` + LIBB2_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libb2" 2>&1` else - LIBB2_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libb2" 2>&1` + LIBB2_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libb2" 2>&1` fi - # Put the nasty error message in config.log where it belongs - echo "$LIBB2_PKG_ERRORS" >&5 + # Put the nasty error message in config.log where it belongs + echo "$LIBB2_PKG_ERRORS" >&5 - have_libb2=no + have_libb2=no elif test $pkg_failed = untried; then - { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 printf "%s\n" "no" >&6; } - have_libb2=no + have_libb2=no else - LIBB2_CFLAGS=$pkg_cv_LIBB2_CFLAGS - LIBB2_LIBS=$pkg_cv_LIBB2_LIBS + LIBB2_CFLAGS=$pkg_cv_LIBB2_CFLAGS + LIBB2_LIBS=$pkg_cv_LIBB2_LIBS { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 printf "%s\n" "yes" >&6; } diff --git a/configure.ac b/configure.ac index ba768aea930714..bad9ddc461894e 100644 --- a/configure.ac +++ b/configure.ac @@ -1788,6 +1788,54 @@ else DEF_MAKE_RULE="all" fi + +AC_ARG_WITH( + [instr-stats], + [AS_HELP_STRING([--with(out)-instr-stats@<:@=yes|no@:>@], + [build with per instruction statistics (default is no)])], + [], + [with_instr_stats=no] +) + +AS_VAR_IF([with_instr_stats], [yes], [ + AC_DEFINE([INSTR_STATS], [1], + [Per instruction statistics.]) +]) + + +AC_ARG_WITH( + [cmlq], + [AS_HELP_STRING([--with(out)-readline@<:@=always|env|no@:>@], + [use CMLQ optimization (default is no)])], + [ + AS_CASE([$with_cmlq], + [always], [with_cmlq=always], + [env], [with_cmlq=env], + [no], [], + [AC_MSG_ERROR([proper usage is --with(out)-cmlq@<:@=always|env|no@:>@])] + ) + ], + [with_cmlq=no] +) + +AS_VAR_IF([with_cmlq], [always], [ + AC_DEFINE([Py_OPT_CMLQ_ALWAYS], [1], + [Use CMLQ unconditionally.]) +]) + +AS_VAR_IF([with_cmlq], [env], [ + AC_DEFINE([Py_OPT_CMLQ_ENV], [1], + [Use CMLQ if an environment variable is set.]) +]) + +AC_MSG_CHECKING([whether to use cmlq]) +AS_VAR_IF([with_cmlq], [no], [ + AC_MSG_RESULT([no]) +], [ + AC_MSG_RESULT([$with_cmlq]) +]) + + AC_ARG_VAR([PROFILE_TASK], [Python args for PGO generation task]) AC_MSG_CHECKING([PROFILE_TASK]) if test -z "$PROFILE_TASK" @@ -3858,6 +3906,28 @@ AS_VAR_IF([have_libffi], [yes], [ ]) ]) +AH_TEMPLATE([HAVE_PAPI], [Define to 1 if you have the `papi' library.]) +AC_SUBST([PAPI_LIBS]) +AC_ARG_WITH( + [cmlq-papi], + [AS_HELP_STRING([--with(out)-cmlq-papi@<:@=yes|no@:>@], + [build with CMLQ PAPI measurements (default is no)])], + [], + [with_cmlq_papi=no] +) + +have_papi=no +AS_VAR_IF([with_cmlq_papi], [yes], [ + AC_DEFINE([CMLQ_PAPI], [1], [Build with CMLQ PAPI measurements.]), + WITH_SAVE_ENV([ + AC_CHECK_LIB(papi, [PAPI_library_init], [ + AC_DEFINE([HAVE_PAPI], [1]) + PAPI_CFLAGS=${PAPI_CFLAGS-""} + PAPI_LIBS=${PAPI_LIBS-"-lpapi"} + ]) + ]), +]) + # Check for use of the system libmpdec library AC_MSG_CHECKING([for --with-system-libmpdec]) AC_ARG_WITH( diff --git a/insights.org b/insights.org new file mode 100644 index 00000000000000..331c4b5046e000 --- /dev/null +++ b/insights.org @@ -0,0 +1,148 @@ +#+title: Insights + +* cgen default instructions +`DefaultInstr`s store a `function_id`. +If no specific template is registered and `add_default` is activated for the instruction set, cgen generates code with the `DefaultInstrt` template is used. +The `DefaultInstr` template is chosen based on the instruction class name. +If no specific template for the opname can be found, cgen uses the class name instead to lookup the template. +Since the class is also called `DefaultInstr`, cgen will use the `DefaultInstr` template for all `DefaultInstr` objects without an explicit template. +The `function_id` is stored in `DEFAULT_INSTR_IMPL_FUNCTIONS` and stores for each opcode name the C function name that implements the corresponding opcode. +For example, `BINARY_ADD` is implemented by `PyNumber_Add`. +This mapping is no longer useful in Python 3.11 because e.g. all binary operation opcodes have been collapsed into `BINARY_OP`. +The actual instruction implementations are added in `add_operation_implementation`. + +* cgen derivative instructions +The derivatives are created in the `derive` function (called via the multiplexer) of the corresponding class, e.g. `InlineCaching`. + +* cgen inline caching derivatives from gdb dump +The inline caching class uses a C structure dump from gdb (stored in `typedefs.TYPE_DATA`) to automatically create inline caching derivatives for each type and operation function. +For example, it generates a derivative for Long together with the concrete instantiation of `tp_richcompare`. + +* the interaction between the optimized and unoptimized interpreter routine became trickier with Python 3.11 +** Python 3.11 uses gotos at certain points (e.g. in RETURN_VALUE) to keep the C stack frame, but execute with a different Python stack frame +** If this happens in the optimized interpreter and the previous frame is an unoptimized frame (i.e. the Python frame to be executed next) we need to deopt properly into the unoptimized interpreter routine. +Otherwise the optimized interpreter routine works on a not properly initialized/optimized Python stack frame + +* coroutines vs await/async vs generators +** generators allow pausing/resuming a function with `yield`` +*** calling code can `send` info back to the yield call into the function +** coroutines are special generators (marked with e.g. `asyncio`) +*** asyncio uses the same mechanisms (`yield` and `send`) to handle coroutines, but accepts only coroutines, not generic generators +** async/await uses the same principles, but accepts only `Awaitable`s. Coroutines are `Awaitable`s by default +*** calling await (the `GET_AWAITABLE` bytecode) calls the `__await__` function of the object to retrieve an iterable (generator) +*** from that point on, `await` essentially does the same as `yield from`, i.e. it reads from the generator (awaitable object) and passes the result down to the calling function as if it was a `yield` call +** conceptually +*** generators produce values +*** coroutines consume and sometimes also produce values +*** coroutines are built on generators internally, by concepts are different + +* 16 bits of cache storage in the 64 bit instruction format is too little +** e.g. precludes optimizations that need to remember the type version (and deopt if the type was changed) +** information could only be stored out of band, i.e., not inline +** there is an ongoing discussion to reduce the size of version numbers, but the problem will likely reappear elsewhere +https://github.com/faster-cpython/ideas/issues/533 +** version tags for e.g. dictionary keys +*** if the keys object structure has not changed, the quickened version can access a value via an index in an array + +* things I learned about Numpy + +** adding a debug version of numpy to the virtualenv +The following command builds numpy (with meson somehow) and passes the `buildtype=debug` and `build-dir=build-dbg` parameters to meson. +`no-build-isolation` means that the pip package uses the binaries/sources in the `build-db` directory or the numpy directory respectively. + +<#+begin_src bash +pip install -v -e ../numpy --no-build-isolation --config-settings="setup-args=-Dbuildtype=debug" --config-settings="build-dir=build-dbg" +#+end_src + +Also (I think) the following patch is neccessary in the `meson.build` file of Numpy +<#+begin_src patch +Subject: [PATCH] patch +--- +Index: meson.build +IDEA additional info: +Subsystem: com.intellij.openapi.diff.impl.patch.CharsetEP +<+>ISO-8859-1 +=================================================================== +diff --git a/meson.build b/meson.build +--- a/meson.build (revision b0371ef240560e78b651a5d7c9407ae3212a3d56) ++++ b/meson.build (date 1705913997667) +@@ -37,8 +37,13 @@ + error('NumPy requires Cython >= 3.0.6') + endif + +-py = import('python').find_installation(pure: false) ++py = import('python').find_installation('/home/felixl/repositories/cpython/python', pure: false) + py_dep = py.dependency() ++# ++# ++#py_inc = include_directories('/home/felixl/repositories/cpython/Include') ++#py_dep = declare_dependency(include_directories : py_inc) ++ + + if not cc.has_header('Python.h', dependencies: py_dep) + error('Cannot compile `Python.h`. Perhaps you need to install python-dev|python-devel') +#+end_src + +** calling Numpy C functions directly +*** depends on what kind of C function should be called +*** if the function is a Python function defined in Numpy (either with `def` or `cdef`), Numpy creates a wrapper with Cython +**** without modification, the wrapper would be called with the [[https://peps.python.org/pep-0590/][vector call protocol]] +**** since we know that the target callable supports the vector call protocol, we can skip all steps of the ceremony but the last +***** packaging the call and its context (e.g. self) into a vectorcall object +***** calling the Cython wrapper +***** unpacking the vectorcall object and shifting the args pointer +***** call the actual callee +**** TODO not sure yet how to properly expose the callee, I just forward declared it in Python +***** Cython does not allow `cdef api` for functions with optional (i.e. `param=None`) parameters for some reason +*** if the function is a C function +**** mark it as an API function with a comment containing `NUMPY_API` +**** register a slot number in `numpy_api.py` +**** the Numpy build will then include it in the corresponding __XXX_api.h file (e.g. `__multiarray_api.h`) +***** the file does not expose the function itself, but adds it to an array and makes it accessible via a macro +***** TODO not sure if that causes a performance penalty because the array is local and the compiler should be able to constant fold it +**** need to call `import_array` to initialize the Numpy array of API pointers somewhere +***** NOTE must not happen in bootstrap Python because it simulates Pythons import and breaks bootstrapping +***** TODO currently part of the super instruction, should instead happen in Python/in the future extension +***** TODO `NPY_FEATURE_VERSION` is not set properly, causing the API header to not define the accessor macro. For now I patched it manually +*** NOTE the fact that Numpy uses Cython might help us in the future +**** maybe we can implement the envisioned automatic generation of a CMLQ extension with Cython + +** numpy ufuncs +*** differentiate between inner func and outer func +**** inner funcs operate element wise, e.g., pairwise add the elements of two arrays +**** outer funcs operate on the structure of arrays, e.g., matrix multiplication +**** when applying an inner func, the ufunc efficiently loops over the array(s) +**** not sure yet how it works for outer funcs +**** outer ufuncs +***** have special argument preparation +****** for legacy reasons, check if arguments are matrixes +****** the resulting number of dimensions are dims(a) + dims(b) +****** each result dimension is 1 +****** +*** represented as Python objects as well +**** contains multiple `PyArrayMethodObject`s that know how to loop over different array types and apply different operations +**** implementations are mostly autogenerated with code in `__umath_generated.c.src` +**** also exploits hardware features such as SSE or AVX if available +** value promotion +*** when operating on arrays of different datatypes, Numpy has to find a common datatype to perform the operation and represent the result +*** the old system, ~value-based promotion~, uses a set of predefined rules and a predefined datatype hierarchy +**** more importantly, the actual values (as opposed to their types) of an operand can decide the promotion (see https://numpy.org/neps/nep-0050-scalar-promotion.html) +<#+begin_src Python +np.result_type(np.int8, 1) == np.int8 +np.result_type(np.int8, 255) == np.int16 + +int64_0d_array = np.array(1, dtype=np.int64) +np.result_type(np.int8, int64_0d_array) == np.int8 # note how the dtype is ignored and the arrays value is considered instead +#+end_src +*** the new scheme never considers the values and consideres Python datatypes to be "weakly" typed, requiring promotion to a Numpy type in an operation +- the environment variable `NPY_PROMOTION_STATE` configures the promotion behaviour + +* implementation notes external specialization +** could use a custom optimizer/executor pair +*** optimizer rewrites instructions to a regular instruction format +*** executor uses a custom dispatch loop that +**** whenever a frame is entered, sets some thread local storage of the the involved extensions to pointers to e.g. the instruction pointer +**** allows bytecode handlers to reside fully in the extension code and use the TLS pointers to alter the instruction pointer / stack pointer +** alternative solution +*** keep using the `EXTERNAL` bytecode and encode the necessary information (e.g. size of the inline cache to skip) either in the oparg or somewhere in the frame +*** could also allocate some cache in for each external instruction in the frame diff --git a/pyconfig.h.in b/pyconfig.h.in index ada9dccfef1084..05aa56d2c149ee 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -32,6 +32,9 @@ /* The Android API level. */ #undef ANDROID_API_LEVEL +/* Build with CMLQ PAPI measurements. */ +#undef CMLQ_PAPI + /* Define if C doubles are 64-bit IEEE 754 binary format, stored in ARM mixed-endian order (byte order 45670123) */ #undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 @@ -875,6 +878,9 @@ /* Define to 1 if you have the header file. */ #undef HAVE_PANEL_H +/* Define to 1 if you have the `papi' library. */ +#undef HAVE_PAPI + /* Define to 1 if you have the `pathconf' function. */ #undef HAVE_PATHCONF @@ -1539,6 +1545,9 @@ /* Define to 1 if you have the `_getpty' function. */ #undef HAVE__GETPTY +/* Per instruction statistics. */ +#undef INSTR_STATS + /* Define to 1 if `major', `minor', and `makedev' are declared in . */ #undef MAJOR_IN_MKDEV @@ -1615,6 +1624,12 @@ SipHash13: 3, externally defined: 0 */ #undef Py_HASH_ALGORITHM +/* Use CMLQ unconditionally. */ +#undef Py_OPT_CMLQ_ALWAYS + +/* Use CMLQ if an environment variable is set. */ +#undef Py_OPT_CMLQ_ENV + /* Define if you want to enable internal statistics gathering. */ #undef Py_STATS