diff --git a/Tools/jit/_targets.py b/Tools/jit/_targets.py index 5604c429bcf8ad..f67fde3ea17285 100644 --- a/Tools/jit/_targets.py +++ b/Tools/jit/_targets.py @@ -182,15 +182,27 @@ async def _compile( async def _build_stencils(self) -> dict[str, _stencils.StencilGroup]: generated_cases = PYTHON_EXECUTOR_CASES_C_H.read_text() - opnames = sorted(re.findall(r"\n {8}case (\w+): \{\n", generated_cases)) + cases_and_opnames = sorted( + re.findall( + r"\n {8}(case (\w+): \{\n.*?\n {8}\})", generated_cases, flags=re.DOTALL + ) + ) tasks = [] with tempfile.TemporaryDirectory() as tempdir: work = pathlib.Path(tempdir).resolve() async with asyncio.TaskGroup() as group: coro = self._compile("trampoline", TOOLS_JIT / "trampoline.c", work) tasks.append(group.create_task(coro, name="trampoline")) - for opname in opnames: - coro = self._compile(opname, TOOLS_JIT_TEMPLATE_C, work) + template = TOOLS_JIT_TEMPLATE_C.read_text() + for case, opname in cases_and_opnames: + # Write out a copy of the template with *only* this case + # inserted. This is about twice as fast as #include'ing all + # of executor_cases.c.h each time we compile (since the C + # compiler wastes a bunch of time parsing the dead code for + # all of the other cases): + c = work / f"{opname}.c" + c.write_text(template.replace("CASE", case)) + coro = self._compile(opname, c, work) tasks.append(group.create_task(coro, name=opname)) return {task.get_name(): task.result() for task in tasks} diff --git a/Tools/jit/template.c b/Tools/jit/template.c index ec7d033e89deff..6cf15085f79933 100644 --- a/Tools/jit/template.c +++ b/Tools/jit/template.c @@ -84,6 +84,8 @@ do { \ #undef WITHIN_STACK_BOUNDS #define WITHIN_STACK_BOUNDS() 1 +#define TIER_TWO 2 + _Py_CODEUNIT * _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState *tstate) { @@ -107,9 +109,9 @@ _JIT_ENTRY(_PyInterpreterFrame *frame, _PyStackRef *stack_pointer, PyThreadState OPT_STAT_INC(uops_executed); UOP_STAT_INC(uopcode, execution_count); - // The actual instruction definitions (only one will be used): switch (uopcode) { -#include "executor_cases.c.h" + // The actual instruction definition gets inserted here: + CASE default: Py_UNREACHABLE(); }