-
Notifications
You must be signed in to change notification settings - Fork 4
/
znver2.log
151 lines (151 loc) · 13 KB
/
znver2.log
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
AMD Ryzen 7 3700X 8-Core Processor
== latency/throughput ==
reg64: add: latency: CPI= 1.00, IPC= 1.00
reg64: add:throughput: CPI= 0.26, IPC= 3.86
reg64: lea: latency: CPI= 1.00, IPC= 1.00
reg64: lea:throughput: CPI= 0.26, IPC= 3.86
reg64: xor dst,dst: latency: CPI= 0.26, IPC= 3.88
reg64: xor dst,dst:throughput: CPI= 0.26, IPC= 3.88
reg64: xor: latency: CPI= 0.26, IPC= 3.88
reg64: xor:throughput: CPI= 0.26, IPC= 3.88
reg64: load: latency: CPI= 4.00, IPC= 0.25
reg64: load:throughput: CPI= 0.63, IPC= 1.60
reg64: crc32: latency: CPI= 3.00, IPC= 0.33
reg64: crc32:throughput: CPI= 1.00, IPC= 1.00
reg64: store [mem+0]->load[mem+0]: latency: CPI= 37.56, IPC= 0.03
reg64: store [mem+0]->load[mem+0]:throughput: CPI= 3.91, IPC= 0.26
reg64: store [mem+0]->load[mem+1]: latency: CPI= 37.38, IPC= 0.03
reg64: store [mem+0]->load[mem+1]:throughput: CPI= 14.00, IPC= 0.07
m128: pxor: latency: CPI= 0.25, IPC= 4.00
m128: pxor:throughput: CPI= 0.25, IPC= 4.00
m128: padd: latency: CPI= 1.00, IPC= 1.00
m128: padd:throughput: CPI= 0.33, IPC= 3.00
m128: pmuldq: latency: CPI= 3.00, IPC= 0.33
m128: pmuldq:throughput: CPI= 1.00, IPC= 1.00
m128: loadps:throughput: CPI= 0.50, IPC= 2.00
m128: loadps->movq: latency: CPI= 9.00, IPC= 0.11
m128: movq->movq: latency: CPI= 6.00, IPC= 0.17
m128: movq->movq:throughput: CPI= 1.00, IPC= 1.00
m128: xorps: latency: CPI= 0.25, IPC= 4.00
m128: xorps:throughput: CPI= 0.25, IPC= 4.00
m128: addps: latency: CPI= 3.00, IPC= 0.33
m128: addps:throughput: CPI= 0.50, IPC= 2.00
m128: mulps: latency: CPI= 3.00, IPC= 0.33
m128: mulps:throughput: CPI= 0.50, IPC= 2.00
m128: divps: latency: CPI= 10.00, IPC= 0.10
m128: divps:throughput: CPI= 3.50, IPC= 0.29
m128: divpd: latency: CPI= 13.00, IPC= 0.08
m128: divpd:throughput: CPI= 5.00, IPC= 0.20
m128: rsqrtps: latency: CPI= 5.00, IPC= 0.20
m128: rsqrtps:throughput: CPI= 1.00, IPC= 1.00
m128: rcpps: latency: CPI= 5.00, IPC= 0.20
m128: rcpps:throughput: CPI= 1.00, IPC= 1.00
m128: blendps: latency: CPI= 1.00, IPC= 1.00
m128: blendps:throughput: CPI= 0.33, IPC= 3.00
m128: blendvps: latency: CPI= 1.00, IPC= 1.00
m128: blendvps:throughput: CPI= 0.50, IPC= 2.00
m128: pshufb: latency: CPI= 1.00, IPC= 1.00
m128: pshufb:throughput: CPI= 0.50, IPC= 2.00
m128: shufps: latency: CPI= 1.00, IPC= 1.00
m128: shufps:throughput: CPI= 0.50, IPC= 2.00
m128: pmullw: latency: CPI= 3.00, IPC= 0.33
m128: pmullw:throughput: CPI= 1.00, IPC= 1.00
m128: phaddd: latency: CPI= 2.00, IPC= 0.50
m128: phaddd:throughput: CPI= 2.00, IPC= 0.50
m128: haddps: latency: CPI= 2.00, IPC= 0.50
m128: haddps:throughput: CPI= 2.00, IPC= 0.50
m128: pinsrd: latency: CPI= 1.79, IPC= 0.56
m128: pinsrd:throughput: CPI= 1.28, IPC= 0.78
m128: pinsrd->pextr: latency: CPI= 8.00, IPC= 0.12
m128: dpps: latency: CPI= 15.00, IPC= 0.07
m128: dpps:throughput: CPI= 4.00, IPC= 0.25
m128: cvtps2dq: latency: CPI= 3.00, IPC= 0.33
m128: cvtps2dq:throughput: CPI= 1.00, IPC= 1.00
m128: pmovmskb:throughput: CPI= 1.00, IPC= 1.00
m128: pmovmskb->movq: latency: CPI= 6.00, IPC= 0.17
m128: movq->movq: latency: CPI= 6.00, IPC= 0.17
m128: movaps [mem]: latency: CPI= 9.00, IPC= 0.11
m128: movaps [mem]:throughput: CPI= 0.50, IPC= 2.00
m128: movdqu [mem+1]: latency: CPI= 10.00, IPC= 0.10
m128: movdqu [mem+1]:throughput: CPI= 0.50, IPC= 2.00
m128: movdqu [mem+63] (cross cache): latency: CPI= 11.00, IPC= 0.09
m128: movdqu [mem+63] (cross cache):throughput: CPI= 1.00, IPC= 1.00
m128: movdqu [mem+2MB-1] (cross page): latency: CPI= 11.00, IPC= 0.09
m128: movdqu [mem+2MB-1] (cross page):throughput: CPI= 1.00, IPC= 1.00
m128: pcmpistri:throughput: CPI= 2.00, IPC= 0.50
m128: pcmpistri->movq: latency: CPI= 11.00, IPC= 0.09
m128: pcmpistrm:throughput: CPI= 2.00, IPC= 0.50
m128: pcmpistrm: latency: CPI= 7.00, IPC= 0.14
m128: pcmpestri:throughput: CPI= 3.00, IPC= 0.33
m128: pcmpestri->movq: latency: CPI= 11.00, IPC= 0.09
m128: pcmpestrm:throughput: CPI= 3.00, IPC= 0.33
m128: pcmpestrm: latency: CPI= 7.33, IPC= 0.14
m256: movaps [mem]: latency: CPI= 1.00, IPC= 1.00
m256: movaps [mem]:throughput: CPI= 0.50, IPC= 2.00
m256: vmovdqu [mem+1]: latency: CPI= 1.00, IPC= 1.00
m256: vmovdqu [mem+1]:throughput: CPI= 0.50, IPC= 2.00
m256: vmovdqu [mem+63] (cross cache): latency: CPI= 1.00, IPC= 1.00
m256: vmovdqu [mem+63] (cross cache):throughput: CPI= 1.00, IPC= 1.00
m256: vmovdqu [mem+2MB-1] (cross page): latency: CPI= 1.00, IPC= 1.00
m256: vmovdqu [mem+2MB-1] (cross page):throughput: CPI= 1.00, IPC= 1.00
m256: vxorps: latency: CPI= 0.25, IPC= 4.00
m256: vxorps:throughput: CPI= 0.25, IPC= 4.00
m256: vmulps: latency: CPI= 3.00, IPC= 0.33
m256: vmulps:throughput: CPI= 0.50, IPC= 2.00
m256: vaddps: latency: CPI= 3.00, IPC= 0.33
m256: vaddps:throughput: CPI= 0.50, IPC= 2.00
m256: vdivps: latency: CPI= 10.00, IPC= 0.10
m256: vdivps:throughput: CPI= 3.50, IPC= 0.29
m256: vdivpd: latency: CPI= 13.00, IPC= 0.08
m256: vdivpd:throughput: CPI= 5.00, IPC= 0.20
m256: vrsqrtps: latency: CPI= 5.00, IPC= 0.20
m256: vrsqrtps:throughput: CPI= 1.00, IPC= 1.00
m256: vrcpps: latency: CPI= 5.00, IPC= 0.20
m256: vrcpps:throughput: CPI= 1.00, IPC= 1.00
m256: vsqrtps: latency: CPI= 14.00, IPC= 0.07
m256: vsqrtps:throughput: CPI= 5.50, IPC= 0.18
m256: vperm2f128: latency: CPI= 3.00, IPC= 0.33
m256: vperm2f128:throughput: CPI= 1.00, IPC= 1.00
m256: vpxor: latency: CPI= 0.25, IPC= 4.00
m256: vpxor:throughput: CPI= 0.25, IPC= 4.00
m256: vpaddd: latency: CPI= 1.00, IPC= 1.00
m256: vpaddd:throughput: CPI= 0.33, IPC= 3.00
m256: vpermps: latency: CPI= 8.00, IPC= 0.12
m256: vpermps:throughput: CPI= 2.00, IPC= 0.50
m256: vpermpd: latency: CPI= 6.00, IPC= 0.17
m256: vpermpd:throughput: CPI= 1.28, IPC= 0.78
m256: vpblendvb: latency: CPI= 1.00, IPC= 1.00
m256: vpblendvb:throughput: CPI= 1.00, IPC= 1.00
m256: vpmovmskb:throughput: CPI= 1.00, IPC= 1.00
m256: vpmovsxwd: latency: CPI= 4.00, IPC= 0.25
m256: vpmovsxwd:throughput: CPI= 1.14, IPC= 0.88
m256: vpgatherdd: latency: CPI= 24.00, IPC= 0.04
m256: vpgatherdd:throughput: CPI= 16.00, IPC= 0.06
m256: gather32(<ld+ins>x8 + perm): latency: CPI= 18.75, IPC= 0.05
m256: gather32(<ld+ins>x8 + perm):throughput: CPI= 4.00, IPC= 0.25
m256: vgatherdpd: latency: CPI= 19.00, IPC= 0.05
m256: vgatherdpd:throughput: CPI= 9.00, IPC= 0.11
m256: gather64(<ld+ins>x4 + perm): latency: CPI= 14.86, IPC= 0.07
m256: gather64(<ld+ins>x4 + perm):throughput: CPI= 2.00, IPC= 0.50
m256: vpshufb: latency: CPI= 1.00, IPC= 1.00
m256: vpshufb:throughput: CPI= 0.50, IPC= 2.00
m256: vfmaps: latency: CPI= 5.00, IPC= 0.20
m256: vfmaps:throughput: CPI= 0.50, IPC= 2.00
m256: vfmapd: latency: CPI= 5.00, IPC= 0.20
m256: vfmapd:throughput: CPI= 0.50, IPC= 2.00
m128: vfmaps: latency: CPI= 5.00, IPC= 0.20
m128: vfmaps:throughput: CPI= 0.50, IPC= 2.00
m128: vfmapd: latency: CPI= 5.00, IPC= 0.20
m128: vfmapd:throughput: CPI= 0.50, IPC= 2.00
reg64: popcnt: latency: CPI= 1.00, IPC= 1.00
reg64: popcnt:throughput: CPI= 0.26, IPC= 3.86
m128: aesenc: latency: CPI= 4.00, IPC= 0.25
m128: aesenc:throughput: CPI= 0.50, IPC= 2.00
m128: aesenclast: latency: CPI= 4.00, IPC= 0.25
m128: aesenclast:throughput: CPI= 0.50, IPC= 2.00
m128: aesdec: latency: CPI= 4.00, IPC= 0.25
m128: aesdec:throughput: CPI= 0.50, IPC= 2.00
m128: aesdeclast: latency: CPI= 4.00, IPC= 0.25
m128: aesdeclast:throughput: CPI= 0.50, IPC= 2.00
m128: pclmulqdq: latency: CPI= 4.58, IPC= 0.22
m128: pclmulqdq:throughput: CPI= 2.00, IPC= 0.50