2424# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
2525# THE SOFTWARE.
2626
27+ import io
2728import struct
2829import sys
2930from binascii import hexlify
@@ -302,6 +303,25 @@ class Opcode:
302303 MP_BC_POP_JUMP_IF_TRUE ,
303304 MP_BC_POP_JUMP_IF_FALSE ,
304305 )
306+ ALL_OFFSET = (
307+ MP_BC_UNWIND_JUMP ,
308+ MP_BC_JUMP ,
309+ MP_BC_POP_JUMP_IF_TRUE ,
310+ MP_BC_POP_JUMP_IF_FALSE ,
311+ MP_BC_JUMP_IF_TRUE_OR_POP ,
312+ MP_BC_JUMP_IF_FALSE_OR_POP ,
313+ MP_BC_SETUP_WITH ,
314+ MP_BC_SETUP_EXCEPT ,
315+ MP_BC_SETUP_FINALLY ,
316+ MP_BC_POP_EXCEPT_JUMP ,
317+ MP_BC_FOR_ITER ,
318+ )
319+ ALL_WITH_CHILD = (
320+ MP_BC_MAKE_FUNCTION ,
321+ MP_BC_MAKE_FUNCTION_DEFARGS ,
322+ MP_BC_MAKE_CLOSURE ,
323+ MP_BC_MAKE_CLOSURE_DEFARGS ,
324+ )
305325
306326 # Create a dict mapping opcode value to opcode name.
307327 mapping = ["unknown" for _ in range (256 )]
@@ -896,7 +916,7 @@ def __init__(self, parent_name, qstr_table, fun_data, prelude_offset, code_kind)
896916 self .escaped_name = unique_escaped_name
897917
898918 def disassemble_children (self ):
899- print ( " children:" , [ rc . simple_name . str for rc in self .children ] )
919+ self .print_children_annotated ( )
900920 for rc in self .children :
901921 rc .disassemble ()
902922
@@ -985,6 +1005,75 @@ def freeze_raw_code(self, prelude_ptr=None, type_sig=0):
9851005 raw_code_count += 1
9861006 raw_code_content += 4 * 4
9871007
1008+ @staticmethod
1009+ def decode_lineinfo (line_info : memoryview ) -> "tuple[int, int, memoryview]" :
1010+ c = line_info [0 ]
1011+ if (c & 0x80 ) == 0 :
1012+ # 0b0LLBBBBB encoding
1013+ return (c & 0x1F ), (c >> 5 ), line_info [1 :]
1014+ else :
1015+ # 0b1LLLBBBB 0bLLLLLLLL encoding (l's LSB in second byte)
1016+ return (c & 0xF ), (((c << 4 ) & 0x700 ) | line_info [1 ]), line_info [2 :]
1017+
1018+ def get_source_annotation (self , ip : int , file = None ) -> dict :
1019+ bc_offset = ip - self .offset_opcodes
1020+ try :
1021+ line_info = memoryview (self .fun_data )[self .offset_line_info : self .offset_opcodes ]
1022+ except AttributeError :
1023+ return {"file" : file , "line" : None }
1024+
1025+ source_line = 1
1026+ while line_info :
1027+ bc_increment , line_increment , line_info = self .decode_lineinfo (line_info )
1028+ if bc_offset >= bc_increment :
1029+ bc_offset -= bc_increment
1030+ source_line += line_increment
1031+ else :
1032+ break
1033+
1034+ return {"file" : file , "line" : source_line }
1035+
1036+ def get_label (self , ip : "int | None" = None , child_num : "int | None" = None ) -> str :
1037+ if ip is not None :
1038+ assert child_num is None
1039+ return "%s.%d" % (self .escaped_name , ip )
1040+ elif child_num is not None :
1041+ return "%s.child%d" % (self .escaped_name , child_num )
1042+ else :
1043+ return "%s" % self .escaped_name
1044+
1045+ def print_children_annotated (self ) -> None :
1046+ """
1047+ Equivalent to `print(" children:", [child.simple_name.str for child in self.children])`,
1048+ but also includes json markers for the start and end of each one's name in that line.
1049+ """
1050+
1051+ labels = ["%s.children" % self .escaped_name ]
1052+ annotation_labels = []
1053+ output = io .StringIO ()
1054+ output .write (" children: [" )
1055+ sep = ", "
1056+ for i , child in enumerate (self .children ):
1057+ if i != 0 :
1058+ output .write (sep )
1059+ start_col = output .tell () + 1
1060+ output .write (child .simple_name .str )
1061+ end_col = output .tell () + 1
1062+ labels .append (self .get_label (child_num = i ))
1063+ annotation_labels .append (
1064+ {
1065+ "name" : self .get_label (child_num = i ),
1066+ "target" : child .get_label (),
1067+ "range" : {
1068+ "startCol" : start_col ,
1069+ "endCol" : end_col ,
1070+ },
1071+ },
1072+ )
1073+ output .write ("]" )
1074+
1075+ print (output .getvalue (), annotations = {"labels" : annotation_labels }, labels = labels )
1076+
9881077
9891078class RawCodeBytecode (RawCode ):
9901079 def __init__ (self , parent_name , qstr_table , obj_table , fun_data ):
@@ -993,9 +1082,58 @@ def __init__(self, parent_name, qstr_table, obj_table, fun_data):
9931082 parent_name , qstr_table , fun_data , 0 , MP_CODE_BYTECODE
9941083 )
9951084
1085+ def get_opcode_annotations_labels (
1086+ self , opcode : int , ip : int , arg : int , sz : int , arg_pos : int , arg_len : int
1087+ ) -> "tuple[dict, list[str]]" :
1088+ annotations = {
1089+ "source" : self .get_source_annotation (ip ),
1090+ "disassembly" : Opcode .mapping [opcode ],
1091+ }
1092+ labels = [self .get_label (ip )]
1093+
1094+ if opcode in Opcode .ALL_OFFSET :
1095+ annotations ["link" ] = {
1096+ "offset" : arg_pos ,
1097+ "length" : arg_len ,
1098+ "to" : ip + arg + sz ,
1099+ }
1100+ annotations ["labels" ] = [
1101+ {
1102+ "name" : self .get_label (ip ),
1103+ "target" : self .get_label (ip + arg + sz ),
1104+ "range" : {
1105+ "startCol" : arg_pos + 1 ,
1106+ "endCol" : arg_pos + arg_len + 1 ,
1107+ },
1108+ },
1109+ ]
1110+
1111+ elif opcode in Opcode .ALL_WITH_CHILD :
1112+ try :
1113+ child = self .children [arg ]
1114+ except IndexError :
1115+ # link out-of-range child to the child array itself
1116+ target = "%s.children" % self .escaped_name
1117+ else :
1118+ # link resolvable child to the actual child
1119+ target = child .get_label ()
1120+
1121+ annotations ["labels" ] = [
1122+ {
1123+ "name" : self .get_label (ip ),
1124+ "target" : target ,
1125+ "range" : {
1126+ "startCol" : arg_pos + 1 ,
1127+ "endCol" : arg_pos + arg_len + 1 ,
1128+ },
1129+ },
1130+ ]
1131+
1132+ return annotations , labels
1133+
9961134 def disassemble (self ):
9971135 bc = self .fun_data
998- print ("simple_name:" , self .simple_name .str )
1136+ print ("simple_name:" , self .simple_name .str , labels = [ self . get_label ()] )
9991137 print (" raw bytecode:" , len (bc ), hexlify_to_str (bc ))
10001138 print (" prelude:" , self .prelude_signature )
10011139 print (" args:" , [self .qstr_table [i ].str for i in self .names [1 :]])
@@ -1011,9 +1149,22 @@ def disassemble(self):
10111149 pass
10121150 else :
10131151 arg = ""
1014- print (
1015- " %-11s %s %s" % (hexlify_to_str (bc [ip : ip + sz ]), Opcode .mapping [bc [ip ]], arg )
1152+
1153+ pre_arg_part = " %-11s %s" % (
1154+ hexlify_to_str (bc [ip : ip + sz ]),
1155+ Opcode .mapping [bc [ip ]],
1156+ )
1157+ arg_part = "%s" % arg
1158+ annotations , labels = self .get_opcode_annotations_labels (
1159+ opcode = bc [ip ],
1160+ ip = ip ,
1161+ arg = arg ,
1162+ sz = sz ,
1163+ arg_pos = len (pre_arg_part ) + 1 ,
1164+ arg_len = len (arg_part ),
10161165 )
1166+
1167+ print (pre_arg_part , arg_part , annotations = annotations , labels = labels )
10171168 ip += sz
10181169 self .disassemble_children ()
10191170
@@ -1114,7 +1265,7 @@ def __init__(
11141265
11151266 def disassemble (self ):
11161267 fun_data = self .fun_data
1117- print ("simple_name:" , self .simple_name .str )
1268+ print ("simple_name:" , self .simple_name .str , labels = [ self . get_label ()] )
11181269 print (
11191270 " raw data:" ,
11201271 len (fun_data ),
@@ -1833,6 +1984,100 @@ def extract_segments(compiled_modules, basename, kinds_arg):
18331984 output .write (source .read (segment .end - segment .start ))
18341985
18351986
1987+ class PrintShim :
1988+ """Base class for interposing extra functionality onto the global `print` method."""
1989+
1990+ def __init__ (self ):
1991+ self .wrapped_print = None
1992+
1993+ def __enter__ (self ):
1994+ global print
1995+
1996+ if self .wrapped_print is not None :
1997+ raise RecursionError
1998+
1999+ self .wrapped_print = print
2000+ print = self
2001+
2002+ return self
2003+
2004+ def __exit__ (self , exc_type , exc_value , traceback ):
2005+ global print
2006+
2007+ if self .wrapped_print is None :
2008+ return
2009+
2010+ print = self .wrapped_print
2011+ self .wrapped_print = None
2012+
2013+ self .on_exit ()
2014+
2015+ def on_exit (self ):
2016+ pass
2017+
2018+ def __call__ (self , * a , ** k ):
2019+ return self .wrapped_print (* a , ** k )
2020+
2021+
2022+ class PrintIgnoreExtraArgs (PrintShim ):
2023+ """Just strip the `annotations` and `labels` kwargs and pass down to the underlying print."""
2024+
2025+ def __call__ (self , * a , annotations : dict = {}, labels : "list[str]" = (), ** k ):
2026+ return super ().__call__ (* a , ** k )
2027+
2028+
2029+ class PrintJson (PrintShim ):
2030+ """Output lines as godbolt-compatible JSON with extra annotation info from `annotations` and `labels`, rather than plain text."""
2031+
2032+ def __init__ (self , fp = sys .stdout , language_id : str = "mpy" ):
2033+ super ().__init__ ()
2034+ self .fp = fp
2035+ self .asm = {
2036+ "asm" : [],
2037+ "labelDefinitions" : {},
2038+ "languageId" : language_id ,
2039+ }
2040+ self .line_number : int = 0
2041+ self .buf : "io.StringIO | None" = None
2042+
2043+ def on_exit (self ):
2044+ import json
2045+
2046+ if self .buf is not None :
2047+ # flush last partial line
2048+ self .__call__ ()
2049+
2050+ json .dump (self .asm , self .fp )
2051+
2052+ def __call__ (self , * a , annotations : dict = {}, labels : "list[str]" = (), ** k ):
2053+ # ignore prints directed to an explicit output
2054+ if "file" in k :
2055+ return super ().__call__ (* a , ** k )
2056+
2057+ if self .buf is None :
2058+ self .buf = io .StringIO ()
2059+
2060+ super ().__call__ (* a , file = sys .stderr , ** k )
2061+
2062+ if "end" in k :
2063+ # buffer partial-line prints to collect into a single AsmResultLine
2064+ return super ().__call__ (* a , file = self .buf , ** k )
2065+ else :
2066+ retval = super ().__call__ (* a , file = self .buf , end = "" , ** k )
2067+ output = self .buf .getvalue ()
2068+ self .buf = None
2069+
2070+ asm_line = {"text" : output }
2071+ asm_line .update (annotations )
2072+ self .asm ["asm" ].append (asm_line )
2073+
2074+ self .line_number += 1
2075+ for label in labels :
2076+ self .asm ["labelDefinitions" ][label ] = self .line_number
2077+
2078+ return retval
2079+
2080+
18362081def main (args = None ):
18372082 global global_qstrs
18382083
@@ -1846,6 +2091,12 @@ def main(args=None):
18462091 "-d" , "--disassemble" , action = "store_true" , help = "output disassembled contents of files"
18472092 )
18482093 cmd_parser .add_argument ("-f" , "--freeze" , action = "store_true" , help = "freeze files" )
2094+ cmd_parser .add_argument (
2095+ "-j" ,
2096+ "--json" ,
2097+ action = "store_true" ,
2098+ help = "output hexdump, disassembly, and frozen code as JSON with extra metadata" ,
2099+ )
18492100 cmd_parser .add_argument (
18502101 "--merge" , action = "store_true" , help = "merge multiple .mpy files into one"
18512102 )
@@ -1913,20 +2164,33 @@ def main(args=None):
19132164 print (er , file = sys .stderr )
19142165 sys .exit (1 )
19152166
1916- if args .hexdump :
1917- hexdump_mpy (compiled_modules )
2167+ if args .json :
2168+ if args .freeze :
2169+ print_shim = PrintJson (sys .stdout , language_id = "c" )
2170+ elif args .hexdump :
2171+ print_shim = PrintJson (sys .stdout , language_id = "stderr" )
2172+ elif args .disassemble :
2173+ print_shim = PrintJson (sys .stdout , language_id = "mpy" )
2174+ else :
2175+ print_shim = PrintJson (sys .stdout )
2176+ else :
2177+ print_shim = PrintIgnoreExtraArgs ()
19182178
1919- if args . disassemble :
2179+ with print_shim :
19202180 if args .hexdump :
1921- print ()
1922- disassemble_mpy (compiled_modules )
2181+ hexdump_mpy (compiled_modules )
19232182
1924- if args .freeze :
1925- try :
1926- freeze_mpy (firmware_qstr_idents , compiled_modules )
1927- except FreezeError as er :
1928- print (er , file = sys .stderr )
1929- sys .exit (1 )
2183+ if args .disassemble :
2184+ if args .hexdump :
2185+ print ()
2186+ disassemble_mpy (compiled_modules )
2187+
2188+ if args .freeze :
2189+ try :
2190+ freeze_mpy (firmware_qstr_idents , compiled_modules )
2191+ except FreezeError as er :
2192+ print (er , file = sys .stderr )
2193+ sys .exit (1 )
19302194
19312195 if args .merge :
19322196 merge_mpy (compiled_modules , args .output )
0 commit comments