From f9de7632b320cb9809548c27c68110849635017c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 8 Dec 2023 11:48:30 +0000 Subject: [PATCH] GH-111485: Factor out opcode ID generator from the main cases generator. (GH-112831) --- Include/opcode_ids.h | 9 +- Makefile.pre.in | 3 +- Tools/cases_generator/cwriter.py | 7 +- Tools/cases_generator/generate_cases.py | 47 +----- Tools/cases_generator/generators_common.py | 19 +++ Tools/cases_generator/opcode_id_generator.py | 153 +++++++++++++++++++ Tools/cases_generator/tier1_generator.py | 37 ++--- 7 files changed, 203 insertions(+), 72 deletions(-) create mode 100644 Tools/cases_generator/generators_common.py create mode 100644 Tools/cases_generator/opcode_id_generator.py diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h index ba25bd459c1bcde..47f809e345f61c3 100644 --- a/Include/opcode_ids.h +++ b/Include/opcode_ids.h @@ -1,6 +1,6 @@ -// This file is generated by Tools/cases_generator/generate_cases.py +// This file is generated by Tools/cases_generator/opcode_id_generator.py // from: -// Python/bytecodes.c +// ['./Python/bytecodes.c'] // Do not edit! #ifndef Py_OPCODE_IDS_H @@ -55,7 +55,6 @@ extern "C" { #define UNARY_NEGATIVE 42 #define UNARY_NOT 43 #define WITH_EXCEPT_START 44 -#define HAVE_ARGUMENT 45 #define BINARY_OP 45 #define BUILD_CONST_KEY_MAP 46 #define BUILD_LIST 47 @@ -200,7 +199,6 @@ extern "C" { #define UNPACK_SEQUENCE_LIST 216 #define UNPACK_SEQUENCE_TUPLE 217 #define UNPACK_SEQUENCE_TWO_TUPLE 218 -#define MIN_INSTRUMENTED_OPCODE 236 #define INSTRUMENTED_RESUME 236 #define INSTRUMENTED_END_FOR 237 #define INSTRUMENTED_END_SEND 238 @@ -233,6 +231,9 @@ extern "C" { #define SETUP_WITH 266 #define STORE_FAST_MAYBE_NULL 267 +#define HAVE_ARGUMENT 45 +#define MIN_INSTRUMENTED_OPCODE 236 + #ifdef __cplusplus } #endif diff --git a/Makefile.pre.in b/Makefile.pre.in index f57894a2118e74a..6ca11f080dcc3f1 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -1587,13 +1587,14 @@ regen-cases: $(PYTHON_FOR_REGEN) \ $(srcdir)/Tools/cases_generator/generate_cases.py \ $(CASESFLAG) \ - -n $(srcdir)/Include/opcode_ids.h.new \ -t $(srcdir)/Python/opcode_targets.h.new \ -m $(srcdir)/Include/internal/pycore_opcode_metadata.h.new \ -e $(srcdir)/Python/executor_cases.c.h.new \ -p $(srcdir)/Lib/_opcode_metadata.py.new \ -a $(srcdir)/Python/abstract_interp_cases.c.h.new \ $(srcdir)/Python/bytecodes.c + $(PYTHON_FOR_REGEN) \ + $(srcdir)/Tools/cases_generator/opcode_id_generator.py -o $(srcdir)/Include/opcode_ids.h.new $(srcdir)/Python/bytecodes.c $(PYTHON_FOR_REGEN) \ $(srcdir)/Tools/cases_generator/tier1_generator.py -o $(srcdir)/Python/generated_cases.c.h.new $(srcdir)/Python/bytecodes.c $(UPDATE_FILE) $(srcdir)/Python/generated_cases.c.h $(srcdir)/Python/generated_cases.c.h.new diff --git a/Tools/cases_generator/cwriter.py b/Tools/cases_generator/cwriter.py index 0b7edd03fd9e47c..34e39855a9b40a6 100644 --- a/Tools/cases_generator/cwriter.py +++ b/Tools/cases_generator/cwriter.py @@ -48,8 +48,13 @@ def maybe_indent(self, txt: str) -> None: if offset <= self.indents[-1] or offset > 40: offset = self.indents[-1] + 4 self.indents.append(offset) - elif "{" in txt or is_label(txt): + if is_label(txt): self.indents.append(self.indents[-1] + 4) + elif "{" in txt: + if 'extern "C"' in txt: + self.indents.append(self.indents[-1]) + else: + self.indents.append(self.indents[-1] + 4) def emit_text(self, txt: str) -> None: self.out.write(txt) diff --git a/Tools/cases_generator/generate_cases.py b/Tools/cases_generator/generate_cases.py index 4b7f028970bd0c9..d0fdc4a0aeb7b0e 100644 --- a/Tools/cases_generator/generate_cases.py +++ b/Tools/cases_generator/generate_cases.py @@ -101,13 +101,6 @@ arg_parser.add_argument( "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT ) -arg_parser.add_argument( - "-n", - "--opcode_ids_h", - type=str, - help="Header file with opcode number definitions", - default=DEFAULT_OPCODE_IDS_H_OUTPUT, -) arg_parser.add_argument( "-t", "--opcode_targets_h", @@ -334,42 +327,8 @@ def map_op(op: int, name: str) -> None: self.opmap = opmap self.markers = markers - def write_opcode_ids( - self, opcode_ids_h_filename: str, opcode_targets_filename: str - ) -> None: - """Write header file that defined the opcode IDs""" - - with open(opcode_ids_h_filename, "w") as f: - # Create formatter - self.out = Formatter(f, 0) - - self.write_provenance_header() - - self.out.emit("") - self.out.emit("#ifndef Py_OPCODE_IDS_H") - self.out.emit("#define Py_OPCODE_IDS_H") - self.out.emit("#ifdef __cplusplus") - self.out.emit('extern "C" {') - self.out.emit("#endif") - self.out.emit("") - self.out.emit("/* Instruction opcodes for compiled code */") - - def define(name: str, opcode: int) -> None: - self.out.emit(f"#define {name:<38} {opcode:>3}") - - all_pairs: list[tuple[int, int, str]] = [] - # the second item in the tuple sorts the markers before the ops - all_pairs.extend((i, 1, name) for (name, i) in self.markers.items()) - all_pairs.extend((i, 2, name) for (name, i) in self.opmap.items()) - for i, _, name in sorted(all_pairs): - assert name is not None - define(name, i) - - self.out.emit("") - self.out.emit("#ifdef __cplusplus") - self.out.emit("}") - self.out.emit("#endif") - self.out.emit("#endif /* !Py_OPCODE_IDS_H */") + def write_opcode_targets(self, opcode_targets_filename: str) -> None: + """Write header file that defines the jump target table""" with open(opcode_targets_filename, "w") as f: # Create formatter @@ -885,7 +844,7 @@ def main() -> None: # These raise OSError if output can't be written a.assign_opcode_ids() - a.write_opcode_ids(args.opcode_ids_h, args.opcode_targets_h) + a.write_opcode_targets(args.opcode_targets_h) a.write_metadata(args.metadata, args.pymetadata) a.write_executor_instructions(args.executor_cases, args.emit_line_directives) a.write_abstract_interpreter_instructions( diff --git a/Tools/cases_generator/generators_common.py b/Tools/cases_generator/generators_common.py new file mode 100644 index 000000000000000..76900d1efffd5d3 --- /dev/null +++ b/Tools/cases_generator/generators_common.py @@ -0,0 +1,19 @@ +from pathlib import Path +from typing import TextIO + +ROOT = Path(__file__).parent.parent.parent +DEFAULT_INPUT = (ROOT / "Python/bytecodes.c").absolute() + + +def root_relative_path(filename: str) -> str: + return Path(filename).relative_to(ROOT).as_posix() + + +def write_header(generator: str, source: str, outfile: TextIO) -> None: + outfile.write( + f"""// This file is generated by {root_relative_path(generator)} +// from: +// {source} +// Do not edit! +""" + ) diff --git a/Tools/cases_generator/opcode_id_generator.py b/Tools/cases_generator/opcode_id_generator.py new file mode 100644 index 000000000000000..a1f6f62156ebd3f --- /dev/null +++ b/Tools/cases_generator/opcode_id_generator.py @@ -0,0 +1,153 @@ +"""Generate the list of opcode IDs. +Reads the instruction definitions from bytecodes.c. +Writes the IDs to opcode._ids.h by default. +""" + +import argparse +import os.path +import sys + +from analyzer import ( + Analysis, + Instruction, + analyze_files, +) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, +) +from cwriter import CWriter +from typing import TextIO + + +DEFAULT_OUTPUT = ROOT / "Include/opcode_ids.h" + + +def generate_opcode_header(filenames: str, analysis: Analysis, outfile: TextIO) -> None: + write_header(__file__, filenames, outfile) + out = CWriter(outfile, 0, False) + out.emit("\n") + instmap: dict[str, int] = {} + + # 0 is reserved for cache entries. This helps debugging. + instmap["CACHE"] = 0 + + # 17 is reserved as it is the initial value for the specializing counter. + # This helps catch cases where we attempt to execute a cache. + instmap["RESERVED"] = 17 + + # 149 is RESUME - it is hard coded as such in Tools/build/deepfreeze.py + instmap["RESUME"] = 149 + instmap["INSTRUMENTED_LINE"] = 254 + + instrumented = [ + name for name in analysis.instructions if name.startswith("INSTRUMENTED") + ] + + # Special case: this instruction is implemented in ceval.c + # rather than bytecodes.c, so we need to add it explicitly + # here (at least until we add something to bytecodes.c to + # declare external instructions). + instrumented.append("INSTRUMENTED_LINE") + + specialized: set[str] = set() + no_arg: list[str] = [] + has_arg: list[str] = [] + + for family in analysis.families.values(): + specialized.update(inst.name for inst in family.members) + + for inst in analysis.instructions.values(): + name = inst.name + if name in specialized: + continue + if name in instrumented: + continue + if inst.properties.oparg: + has_arg.append(name) + else: + no_arg.append(name) + + # Specialized ops appear in their own section + # Instrumented opcodes are at the end of the valid range + min_internal = 150 + min_instrumented = 254 - (len(instrumented) - 1) + assert min_internal + len(specialized) < min_instrumented + + next_opcode = 1 + + def add_instruction(name: str) -> None: + nonlocal next_opcode + if name in instmap: + return # Pre-defined name + while next_opcode in instmap.values(): + next_opcode += 1 + instmap[name] = next_opcode + next_opcode += 1 + + for name in sorted(no_arg): + add_instruction(name) + for name in sorted(has_arg): + add_instruction(name) + # For compatibility + next_opcode = min_internal + for name in sorted(specialized): + add_instruction(name) + next_opcode = min_instrumented + for name in instrumented: + add_instruction(name) + + for op, name in enumerate(sorted(analysis.pseudos), 256): + instmap[name] = op + + assert 255 not in instmap.values() + + out.emit( + """#ifndef Py_OPCODE_IDS_H +#define Py_OPCODE_IDS_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Instruction opcodes for compiled code */ +""" + ) + + def write_define(name: str, op: int) -> None: + out.emit(f"#define {name:<38} {op:>3}\n") + + for op, name in sorted([(op, name) for (name, op) in instmap.items()]): + write_define(name, op) + + out.emit("\n") + write_define("HAVE_ARGUMENT", len(no_arg)) + write_define("MIN_INSTRUMENTED_OPCODE", min_instrumented) + + out.emit("\n") + out.emit("#ifdef __cplusplus\n") + out.emit("}\n") + out.emit("#endif\n") + out.emit("#endif /* !Py_OPCODE_IDS_H */\n") + + +arg_parser = argparse.ArgumentParser( + description="Generate the header file with all opcode IDs.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, +) + +arg_parser.add_argument( + "-o", "--output", type=str, help="Generated code", default=DEFAULT_OUTPUT +) + +arg_parser.add_argument( + "input", nargs=argparse.REMAINDER, help="Instruction definition file(s)" +) + +if __name__ == "__main__": + args = arg_parser.parse_args() + if len(args.input) == 0: + args.input.append(DEFAULT_INPUT) + data = analyze_files(args.input) + with open(args.output, "w") as outfile: + generate_opcode_header(args.input, data, outfile) diff --git a/Tools/cases_generator/tier1_generator.py b/Tools/cases_generator/tier1_generator.py index eba926435d24151..9787403b3bbc473 100644 --- a/Tools/cases_generator/tier1_generator.py +++ b/Tools/cases_generator/tier1_generator.py @@ -17,33 +17,18 @@ StackItem, analysis_error, ) +from generators_common import ( + DEFAULT_INPUT, + ROOT, + write_header, +) from cwriter import CWriter from typing import TextIO, Iterator from lexer import Token from stack import StackOffset -HERE = os.path.dirname(__file__) -ROOT = os.path.join(HERE, "../..") -THIS = os.path.relpath(__file__, ROOT).replace(os.path.sep, "/") - -DEFAULT_INPUT = os.path.relpath(os.path.join(ROOT, "Python/bytecodes.c")) -DEFAULT_OUTPUT = os.path.relpath(os.path.join(ROOT, "Python/generated_cases.c.h")) - - -def write_header(filename: str, outfile: TextIO) -> None: - outfile.write( - f"""// This file is generated by {THIS} -// from: -// {filename} -// Do not edit! - -#ifdef TIER_TWO - #error "This file is for Tier 1 only" -#endif -#define TIER_ONE 1 -""" - ) +DEFAULT_OUTPUT = ROOT / "Python/generated_cases.c.h" FOOTER = "#undef TIER_ONE\n" @@ -351,7 +336,15 @@ def uses_this(inst: Instruction) -> bool: def generate_tier1( filenames: str, analysis: Analysis, outfile: TextIO, lines: bool ) -> None: - write_header(filenames, outfile) + write_header(__file__, filenames, outfile) + outfile.write( + """ +#ifdef TIER_TWO + #error "This file is for Tier 1 only" +#endif +#define TIER_ONE 1 +""" + ) out = CWriter(outfile, 2, lines) out.emit("\n") for name, inst in sorted(analysis.instructions.items()):