From 9ac7d838853e596b25432a257f81ce212b267e59 Mon Sep 17 00:00:00 2001 From: xor Date: Wed, 25 Sep 2024 23:32:29 +0200 Subject: [PATCH] argparse and eip ranges --- json2idc.py | 131 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 76 insertions(+), 55 deletions(-) diff --git a/json2idc.py b/json2idc.py index 27fd19156f6..48404d8c865 100755 --- a/json2idc.py +++ b/json2idc.py @@ -1,10 +1,10 @@ #!/usr/bin/env python3 import re - import jsonpickle import sys +import argparse -image_size = 39616 - 0x800 # 0x22b90 - 0x200 # .exe size - exe header +image_size = 939072 - 0xa40 # 0x22b90 - 0x200 # .exe size - exe header dosbox_load_seg = 0x1a2 # para ida_load_seg = 0x1000 @@ -36,80 +36,99 @@ def read_segments_map(file_name): m = re.match( r'^\s*MakeName\s*\(\s*(?P
[0-9A-Fa-fXx]+)\s*,\s*"(?P\S+)"\s*\)\s*;', line) - if m: - name = m["name"] - if all(not name.startswith(x) for x in {"sub_", "loc_", "locret_", "byte_", "word_", "dword_"}): - symbols[m["address"]] = name + if not m: + continue + name = m["name"] + if all(not name.startswith(x) for x in {"sub_", "loc_", "locret_", "byte_", "word_", "dword_"}): + symbols[m["address"]] = name return symbols -json_fname = sys.argv[1] -map_fname = sys.argv[2] -if not json_fname.endswith('.json'): - print('Provide json with Run-Time data') - exit(1) -idc_fname = json_fname.replace('.json', '.idc') -symbols = read_segments_map(map_fname) -cs = set() +def main(): + parser = argparse.ArgumentParser(description="Process a .json file and a .map file to generate IDC script.") + parser.add_argument('json_file', help='Path to the .json file with run-time data') + parser.add_argument('map_file', help='Path to the .map file with segment information') -with open(idc_fname, 'w') as outfile: - outfile.write('''#include -static main(){ -set_inf_attr(INF_PROCNAME, "80386r"); -set_target_assembler("Generic for intel 80x86"); -''') - with open(json_fname) as infile: - j = jsonpickle.decode(infile.read()) - for daddr, instr in j['Code'].items(): - addr = addr_dbx2ida(int(daddr, 16)) - for seg in ['cs', 'ds', 'es', 'gs', 'fs', 'ss']: - all_segs |= set(instr[seg]) - all_segs = list(sorted(all_segs)) + args = parser.parse_args() - for daddr, instr in j['Code'].items(): - addr = addr_dbx2ida(int(daddr, 16)) + global all_segs + json_fname = args.json_file + map_fname = args.map_file - outfile.write(f'MakeCode(0x{addr:x}); // {daddr}\n') + if not json_fname.endswith('.json'): + print('Error: Provide a .json file with run-time data') + sys.exit(1) - if instr['Video']: # Identify instructions accessing video memory - cseg = seg_dbx2ida(instr["cs"][0]) - print(f'Video acc instr: {cseg:x}:{addr - cseg * 0x10:x}') + idc_fname = json_fname.replace('.json', '.idc') + symbols = read_segments_map(map_fname) + code_segs = dict() - for seg in ['ds', 'es', 'gs', 'fs', 'ss']: # set default reg values used for instruction - if seg in instr and len(instr[seg]) == 1: - outfile.write( - f'split_sreg_range(0x{addr:x},"{seg}",0x{seg_dbx2ida(instr[seg][0]):x},2); // 0x{daddr} 0x{instr[seg][0]:x}\n') - if instr["cs"]: - cs.add(instr["cs"][0]) + with open(idc_fname, 'w') as outfile: + outfile.write('''#include +static main(){ +set_inf_attr(INF_PROCNAME, "80386r"); +set_target_assembler("Generic for intel 80x86"); +''') + with open(json_fname) as infile: + j = jsonpickle.decode(infile.read()) + for daddr, instr in j['Code'].items(): + addr = addr_dbx2ida(int(daddr, 16)) + for seg in ['cs', 'ds', 'es', 'gs', 'fs', 'ss']: + all_segs |= set(instr[seg]) + all_segs = list(sorted(all_segs)) - if 'Data' in j: - for daddr, data in j['Data'].items(): # Set variables sizes + for daddr, instr in j['Code'].items(): + if len(instr["cs"]) == 0: + continue addr = addr_dbx2ida(int(daddr, 16)) - if not data['Array'] and len(data['Sizes']) == 1: # Don't know yet how to handle arrays + outfile.write(f'MakeCode(0x{addr:x}); // {daddr}\n') + cs = instr["cs"][0] + cseg = seg_dbx2ida(cs) + eip = addr - cseg * 0x10 + if instr['Video']: # Identify instructions accessing video memory + print(f'Video acc instr: {cseg:x}:{eip:x}') + + for seg in ['ds', 'es', 'gs', 'fs', 'ss']: # set default reg values used for instruction + if seg in instr and len(instr[seg]) == 1: + outfile.write( + f'split_sreg_range(0x{addr:x},"{seg}",0x{seg_dbx2ida(instr[seg][0]):x},2); // 0x{daddr} 0x{instr[seg][0]:x}\n') + if instr["cs"]: + if cs not in code_segs: + code_segs[cs] = [eip, eip] + else: + code_segs[cs][0] = min(code_segs[cs][0], eip) + code_segs[cs][1] = max(code_segs[cs][1], eip) + + if 'Data' in j: + for daddr, data in j['Data'].items(): # Set variables sizes + addr = addr_dbx2ida(int(daddr, 16)) + + if data['Array'] or len(data['Sizes']) != 1: # Don't know yet how to handle arrays + continue size = data['Sizes'][0] # Only set if it was single size text = {1: 'Byte', 2: 'Word', 4: 'Dword'}[size] outfile.write( f'Make{text}(0x{addr:x}); // 0x{daddr}\n') - for daddr in sorted(j['Jumps'], reverse=True): - addr = addr_dbx2ida(daddr) - outfile.write(f'add_func(0x{addr:x}); // 0x{daddr:x}\n') + for daddr in sorted(j['Jumps'], reverse=True): + addr = addr_dbx2ida(daddr) + outfile.write(f'add_func(0x{addr:x}); // 0x{daddr:x}\n') - print('Used segs: ') - print(','.join([f'{seg_dbx2ida(seg):x}' for seg in sorted(all_segs) if - dosbox_load_seg <= seg < dosbox_load_seg + image_size // 0x10])) + print('Used segs: ') + print(','.join([f'{seg_dbx2ida(seg):x}' for seg in sorted(all_segs) if + dosbox_load_seg <= seg < dosbox_load_seg + image_size // 0x10])) - for symbol, addr in symbols.items(): - outfile.write(f'set_name(0x{addr:x},"_{symbol}",SN_FORCE);\n') - outfile.write(""" + for symbol, addr in symbols.items(): + outfile.write(f'set_name(0x{addr:x},"_{symbol}",SN_FORCE);\n') + outfile.write(""" print("Applied addresses and types"); // unhide all functions //auto ea = get_func_attr(INF_MIN_EA, FUNCATTR_START); //while (ea != BADADDR) { //set_visible_func(ea, true); - //ea = get_func_attr(ea, FUNCATTR_START); + //ea = get_func_attr(ea, FUNCATTR_START); //} // produce a listing file @@ -124,8 +143,10 @@ def read_segments_map(file_name): //fclose(fpm); //print("Generated map"); }""") - print("Code segments:") - for seg in sorted(cs): - print(f"{seg_dbx2ida(seg):x}") + print("Used code segments and ip range:") + for seg, minmax in code_segs.items(): + print(f"{seg_dbx2ida(seg):x} {minmax[0]:x}:{minmax[1]:x}") +if __name__ == '__main__': + main()