-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathtest_gen.py
executable file
·132 lines (103 loc) · 3.2 KB
/
test_gen.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/env python
# utility to generate tests
import re, sys, codecs
N_SAMPLES = 8 # number of samples for each encoding
from arm64test import instr_to_il, il2str
if not sys.argv[1:]:
sys.exit(-1)
arch = None
def disassemble(addr, data):
global arch
if not arch:
arch = binaryninja.Architecture['aarch64']
(tokens, length) = arch.get_instruction_text(data, addr)
if not tokens or length==0:
return None
return disasm_test.normalize(''.join([x.text for x in tokens]))
def print_case(data, comment=''):
ilstr = instr_to_il(data)
il_lines = ilstr.split(';')
print("\t(b'%s', " % (''.join(['\\x%02X'%b for b in data])), end='')
for (i,line) in enumerate(il_lines):
if i!=0:
print('\t\t\t\t\t\t ', end='')
print('\'%s' % line, end='')
if i!=len(il_lines)-1:
print(';\' + \\')
comment = ' # '+comment if comment else ''
print('\'),%s' % comment)
def gather_samples(mnems, encodings):
encodings = [x.upper() for x in encodings]
global N_SAMPLES
fpath = './disassembler/test_cases.txt'
with open(fpath) as fp:
lines = fp.readlines()
samples = 0
current_encoding = None
for line in lines:
if line.startswith('// NOTE:'): continue
if line.startswith('// SYNTAX:'): continue
if re.match(r'^// .*? .*', line):
m = re.match(r'^// (.*?) .*', line)
# example:
# // BFCVT_Z_P_Z_S2BF 01100101|opc=10|0010|opc2=10|101|Pg=xxx|Zn=xxxxx|Zd=xxxxx
current_encoding = m.group(1)
samples = 0
continue
m = re.match(r'^(..)(..)(..)(..) (.*)$', line)
if m:
# example:
# 658AB9BB bfcvt z27.h, p6/m, z13.s
if samples >= N_SAMPLES:
continue
(b0, b1, b2, b3, instxt) = m.group(1,2,3,4,5)
data = codecs.decode(b3+b2+b1+b0, 'hex_codec')
#if not (instxt==mnem or instxt.startswith(mnem+' ')):
mnemonic_match = [x for x in mnems if instxt.startswith(x)]
encoding_match = current_encoding.upper() in encodings
if not (mnemonic_match or encoding_match):
continue
#if samples == 0:
# print('\t# %s' % encoding)
print('\t# %s %s' % (instxt.ljust(64), current_encoding))
print_case(data)
samples += 1
continue
print('unable to parse line: %s' % line)
sys.exit(-1)
# generate lifting tests for a given mnemonic
# example:
# ./test_gen mnemonic ld1
if sys.argv[1] == 'mnemonic':
mnem = sys.argv[2]
print('searching for mnemonic -%s-' % mnem)
gather_samples([mnem], [])
elif sys.argv[1] == 'encoding':
encname = sys.argv[2]
print('searching for encoding -%s-' % encname)
gather_samples([], [encname])
elif sys.argv[1] == 'mte':
mnems = ['addg', 'cmpp', 'gmi', 'irg', 'ldg', 'dgv', 'ldgm', 'st2g', 'stg',
'stgm', 'stgp', 'stgv', 'stz2g', 'stzg', 'stzgm', 'subg', 'subp',
'subps']
gather_samples(mnems, [])
elif sys.argv[1] == 'recompute_arm64test':
with open('arm64test.py') as fp:
lines = [x.rstrip() for x in fp.readlines()]
i = 0
while i < len(lines):
m = re.match(r'^\t\(b\'\\x(..)\\x(..)\\x(..)\\x(..)\'.*$', lines[i])
if not m:
print(lines[i])
i += 1
continue
(b0, b1, b2, b3) = m.group(1,2,3,4)
comment = None
m = re.search(r'# (.*)$', lines[i])
if m:
comment = m.group(1)
data = codecs.decode(b0+b1+b2+b3, 'hex_codec')
print_case(data, comment)
i += 1
while lines[i].startswith('\t\t\t\t\t\t'):
i += 1