Skip to content

Commit

Permalink
cmq
Browse files Browse the repository at this point in the history
  • Loading branch information
Felix Berlakovich committed Sep 3, 2024
1 parent 0fb18b0 commit c408bcb
Show file tree
Hide file tree
Showing 65 changed files with 4,253 additions and 1,164 deletions.
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ Parser/token.c generated
Programs/test_frozenmain.h generated
Python/Python-ast.c generated
Python/generated_cases.c.h generated
Python/opcode_metadata.h generated
Python/opcode_targets.h generated
Python/stdlib_module_names.h generated
Tools/peg_generator/pegen/grammar_parser.py generated
Expand Down
21 changes: 21 additions & 0 deletions Include/Python.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,27 @@
#include <assert.h> // assert()
#include <wchar.h> // wchar_t

#ifdef CMLQ_PAPI
#define CMLQ_PAPI_BEGIN(NAME) \
int papi_retval = PAPI_hl_region_begin(NAME); \
assert(papi_retval == PAPI_OK);

#define CMLQ_PAPI_END(NAME) \
papi_retval = PAPI_hl_region_end(NAME); \
assert(papi_retval == PAPI_OK);

#define CMLQ_PAPI_REGION(NAME, CODE) \
CMLQ_PAPI_BEGIN(NAME) \
CODE; \
CMLQ_PAPI_END(NAME)
#else
#define CMLQ_PAPI_REGION(NAME, CODE) \
CODE;
#define CMLQ_PAPI_BEGIN(NAME)
#define CMLQ_PAPI_END(NAME)
#endif


#include "pyport.h"
#include "pymacro.h"
#include "pymath.h"
Expand Down
72 changes: 72 additions & 0 deletions Include/cpython/code.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ typedef union {
} _Py_CODEUNIT;



/* These macros only remain defined for compatibility. */
#define _Py_OPCODE(word) ((word).op.code)
#define _Py_OPARG(word) ((word).op.arg)
Expand Down Expand Up @@ -104,6 +105,57 @@ typedef struct {
uint8_t *per_instruction_tools;
} _PyCoMonitoringData;

typedef struct _deoptInfo{
// if a whole chain of instructions is deoptimized
struct _deoptInfo *child;
_Py_CODEUNIT orig_instr;
_Py_CODEUNIT *position;
short data;
struct _deoptInfo *next;
struct _deoptInfo *prev;
} PyExternalDeoptInfo;

#ifndef __cplusplus
typedef int (*PyExternal_CodeHandler)(void *restrict external_cache_pointer, PyObject* restrict ** stack_pointer);
typedef int (*ExternalSpecializationHook)(_Py_CODEUNIT* old_instr, PyObject ***stack_pointer);
#else
typedef int (*PyExternal_CodeHandler)(_Py_CODEUNIT **next_instr, PyObject **stack_pointer);
typedef int (*ExternalSpecializationHook)(_Py_CODEUNIT *old_instr, PyObject ***stack_pointer);
#endif

typedef void (*FunctionEndHook)(_Py_CODEUNIT *instr, void* external_cache_pointer);
typedef int (*SpecializeInstructionPtr)(_Py_CODEUNIT*, int, PyExternal_CodeHandler, void *);
typedef int (*SpecializeChainPtr)(_Py_CODEUNIT *, PyObject **, int , PyExternal_CodeHandler, unsigned char, void *);
typedef int (*IsOperandConstantPtr)(_Py_CODEUNIT *, PyObject **, int );

typedef struct _PyExternalSpecializer {
ExternalSpecializationHook TrySpecialization;
FunctionEndHook FunctionEnd;

// TODO: workaround until we resolve the mysterious linking performance issue
// For some reason a few benchmarks suffer a major performance regression when the numpy module
// dynamically resolves the function with the linker. This hack avoids the resolution by the linker and seems to help
SpecializeInstructionPtr SpecializeInstruction;
SpecializeChainPtr SpecializeChain;
IsOperandConstantPtr IsOperandConstant;
} PyExternalSpecializer;


#if defined(Py_OPT_CMLQ_ENV) || defined(Py_OPT_CMLQ_ALWAYS)
#define CMLQ_Def \
PyObject *co_size_table; \
PyExternalDeoptInfo *co_deopt_info_head;
#else
#define CMLQ_Def
#endif

#ifdef INSTR_STATS
#define CMLQ_Stats_Def \
PyObject *co_stats_table;
#else
#define CMLQ_Stats_Def
#endif

// To avoid repeating ourselves in deepfreeze.py, all PyCodeObject members are
// defined in this macro:
#define _PyCode_DEF(SIZE) { \
Expand Down Expand Up @@ -159,6 +211,8 @@ typedef struct {
PyObject *co_name; /* unicode (name, for reference) */ \
PyObject *co_qualname; /* unicode (qualname, for reference) */ \
PyObject *co_linetable; /* bytes object that holds location info */ \
CMLQ_Def; \
CMLQ_Stats_Def; \
PyObject *co_weakreflist; /* to support weakrefs to code objects */ \
_PyCoCached *_co_cached; /* cached co_* attributes */ \
uint64_t _co_instrumentation_version; /* current instrumentation version */ \
Expand Down Expand Up @@ -226,6 +280,24 @@ static inline int PyCode_GetFirstFree(PyCodeObject *op) {
#define _PyCode_CODE(CO) _Py_RVALUE((_Py_CODEUNIT *)(CO)->co_code_adaptive)
#define _PyCode_NBYTES(CO) (Py_SIZE(CO) * (Py_ssize_t)sizeof(_Py_CODEUNIT))


__attribute__((__used__))
static int instruction_offset(PyCodeObject* co, _Py_CODEUNIT* instr) {
return instr - _PyCode_CODE(co);
}


#ifdef INSTR_STATS
typedef struct _CMLQStatsElem {
uint64_t exec_count;
uint64_t specialization_attempts;
uint64_t exec_ms;
} CMLQStatsElem;

CMLQStatsElem *get_stats_elem(PyCodeObject* code, _Py_CODEUNIT* instr_ptr);
#endif


/* Unstable public interface */
PyAPI_FUNC(PyCodeObject *) PyUnstable_Code_New(
int, int, int, int, int, PyObject *, PyObject *,
Expand Down
4 changes: 4 additions & 0 deletions Include/cpython/pyframe.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

#ifndef Py_CPYTHON_PYFRAME_H
# error "this header file must not be included directly"
#endif
Expand Down Expand Up @@ -33,3 +34,6 @@ PyAPI_FUNC(int) PyUnstable_InterpreterFrame_GetLasti(struct _PyInterpreterFrame
/* Returns the currently executing line number, or -1 if there is no line number.
* Does not raise an exception. */
PyAPI_FUNC(int) PyUnstable_InterpreterFrame_GetLine(struct _PyInterpreterFrame *frame);

PyAPI_FUNC(void) PyExternal_SetSpecializer(PyExternalSpecializer *specializer);
//PyAPI_FUNC(int) PyExternal_SpecializeInstruction(_Py_CODEUNIT *instr, int slot, PyExternal_CodeHandler new_handler);
4 changes: 4 additions & 0 deletions Include/internal/pycore_code.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ typedef struct {

typedef struct {
uint16_t counter;
uint16_t external_cache_pointer[4];
} _PyBinaryOpCache;

#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache)
Expand All @@ -47,6 +48,7 @@ typedef struct {

typedef struct {
uint16_t counter;
uint16_t external_cache_pointer[4];
} _PyBinarySubscrCache;

#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache)
Expand Down Expand Up @@ -79,6 +81,7 @@ typedef struct {
typedef struct {
uint16_t counter;
uint16_t func_version[2];
uint16_t external_cache_pointer[4];
} _PyCallCache;

#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache)
Expand Down Expand Up @@ -163,6 +166,7 @@ struct _PyCodeConstructor {

/* the code */
PyObject *code;
PyObject *size_table;
int firstlineno;
PyObject *linetable;

Expand Down
49 changes: 49 additions & 0 deletions Include/internal/pycore_frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,22 @@ typedef struct _PyInterpreterFrame {
#define _PyInterpreterFrame_LASTI(IF) \
((int)((IF)->prev_instr - _PyCode_CODE((IF)->f_code)))


typedef struct _PyCache {
uint16_t counter;
} _PyCache;

#define POINTER_FROM_ARRAY(array) *((void **)array)
int
_PyExternal_TrySpecialize(_Py_CODEUNIT *instr, PyObject ***stack_pointer, _PyCache *cache);
void
_PyExternal_FunctionEnd(_PyInterpreterFrame *frame);



_Py_CODEUNIT*
_PyExternal_Deoptimize(const _Py_CODEUNIT *instr, _PyInterpreterFrame* frame);

static inline PyObject **_PyFrame_Stackbase(_PyInterpreterFrame *f) {
return f->localsplus + f->f_code->co_nlocalsplus;
}
Expand All @@ -96,6 +112,39 @@ static inline void _PyFrame_StackPush(_PyInterpreterFrame *f, PyObject *value) {
f->stacktop++;
}

__attribute__((__used__))
static const char* function_name(_PyInterpreterFrame* f) {
if (!f) {
return "NULL";
}
if (f->f_code && f->f_code->co_name) {
return _PyUnicode_AsString(f->f_code->co_name);
}
else {
return "No Code";
}
}

__attribute__((__used__))
static const char* code_name(PyCodeObject* c) {
if (!c) {
return "NULL";
}
if (c->co_name) {
return _PyUnicode_AsString(c->co_name);
}
else {
return "No Code";
}
}

__attribute__((__used__))
static int in_function(_PyInterpreterFrame* f, char* name) {
int result = strcmp(function_name(f), name) == 0;
return result;
}


#define FRAME_SPECIALS_SIZE ((int)((sizeof(_PyInterpreterFrame)-1)/sizeof(PyObject *)))

static inline int
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_global_objects_fini_generated.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Include/internal/pycore_global_strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ struct _Py_global_strings {
STRUCT_FOR_ID(__release_buffer__)
STRUCT_FOR_ID(__repr__)
STRUCT_FOR_ID(__reversed__)
STRUCT_FOR_ID(__rewrite__)
STRUCT_FOR_ID(__rfloordiv__)
STRUCT_FOR_ID(__rlshift__)
STRUCT_FOR_ID(__rmatmul__)
Expand Down
1 change: 1 addition & 0 deletions Include/internal/pycore_long.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ static inline PyObject* _PyLong_FromUnsignedChar(unsigned char i)
PyObject *_PyLong_Add(PyLongObject *left, PyLongObject *right);
PyObject *_PyLong_Multiply(PyLongObject *left, PyLongObject *right);
PyObject *_PyLong_Subtract(PyLongObject *left, PyLongObject *right);
PyObject *_PyLong_True_Divide(PyLongObject *left, PyLongObject *right);

/* Used by Python/mystrtoul.c, _PyBytes_FromHex(),
_PyBytes_DecodeEscape(), etc. */
Expand Down
Loading

0 comments on commit c408bcb

Please sign in to comment.