14
14
15
15
include AsmMacros. inc
16
16
17
- ;char *memset(dst, value, count) - sets "count" bytes at "dst" to "value"
17
+ extern memset:proc
18
+ extern memmove:proc
19
+
20
+ ; JIT_MemSet/JIT_MemCpy
18
21
;
19
- ;Purpose:
20
- ; Sets the first "count" bytes of the memory starting
21
- ; at "dst" to the character value "value" .
22
+ ; It is IMPORTANT that the exception handling code is able to find these guys
23
+ ; on the stack, but on windows platforms we can just defer to the platform
24
+ ; implementation .
22
25
;
23
- ;Algorithm:
24
- ;Set dst based on count as follow
25
- ; count [0, 16]: use 1/2/4/8 bytes width registers
26
- ; count [16, 128]: use 16 bytes width registers (XMM) without loop
27
- ; count [128, 512]: use 16 bytes width registers (XMM) with loops, unrolled 8 times
28
- ; count [512, upper]: use rep stosb
29
- ;Entry:
30
- ; char *dst - pointer to memory to fill with value
31
- ; char value - value to put in dst bytes
32
- ; int count - number of bytes of dst to fill
26
+
27
+ ; void JIT_MemSet(void* dest, int c, size_t count)
33
28
;
34
- ;Exit:
35
- ; returns dst, with filled bytes
29
+ ; Purpose:
30
+ ; Sets the first "count" bytes of the block of memory pointed byte
31
+ ; "dest" to the specified value (interpreted as an unsigned char).
36
32
;
37
- ;Uses:
33
+ ; Entry:
34
+ ; RCX: void* dest - Pointer to the block of memory to fill.
35
+ ; RDX: int c - Value to be set.
36
+ ; R8: size_t count - Number of bytes to be set to the value.
38
37
;
39
- ;Exceptions:
38
+ ; Exit:
39
+ ;
40
+ ; Uses:
41
+ ;
42
+ ; Exceptions:
40
43
;
41
- ;*******************************************************************************
42
-
43
44
LEAF_ENTRY JIT_MemSet , _TEXT
45
+ test r8 , r8 ; check if count is zero
46
+ jz Exit_MemSet ; if zero, no bytes to set
44
47
45
- movzx edx , dl ; set fill pattern
46
- mov r9 , 0101010101010101h
47
- imul rdx , r9 ; rdx is 8 bytes filler
48
+ cmp byte ptr [ rcx ], 0 ; check dest for null
48
49
49
- cmp r8 , 16
50
- jbe mset04
50
+ jmp memset ; forward to the CRT implementation
51
51
52
- cmp r8 , 512
53
- jbe mset00
54
-
55
- ; count > 512
56
- mov r10 , rcx ; save dst address
57
- mov r11 , rdi ; save rdi
58
- mov eax , edx ; eax is value
59
- mov rdi , rcx ; rdi is dst
60
- mov rcx , r8 ; rcx is count
61
- rep stosb
62
- mov rdi , r11 ; restore rdi
63
- mov rax , r10
52
+ Exit_MemSet:
64
53
ret
65
54
66
- align 16
67
- mset00: mov rax , rcx ; save dst address
68
- movd xmm0 , rdx
69
- punpcklbw xmm0 , xmm0 ; xmm0 is 16 bytes filler
70
-
71
- cmp r8 , 128
72
- jbe mset02
73
-
74
- ; count > 128 && count <= 512
75
- mov r9 , r8
76
- shr r9 , 7 ; count/128
77
-
78
- align 16
79
- mset01: movdqu [ rcx ], xmm0
80
- movdqu 16 [ rcx ], xmm0
81
- movdqu 32 [ rcx ], xmm0
82
- movdqu 48 [ rcx ], xmm0
83
- movdqu 64 [ rcx ], xmm0
84
- movdqu 80 [ rcx ], xmm0
85
- movdqu 96 [ rcx ], xmm0
86
- movdqu 112 [ rcx ], xmm0
87
- add rcx , 128
88
- dec r9
89
- jnz mset01
90
- and r8 , 7fh ; and r8 with 0111 1111
91
-
92
- ; the remainder is from 0 to 127
93
- cmp r8 , 16
94
- jnbe mset02
95
-
96
- ; the remainder <= 16
97
- movdqu - 16 [ rcx + r8 ], xmm0
98
- ret
99
-
100
- ; count > 16 && count <= 128 for mset02
101
- align 16
102
- mset02: movdqu [ rcx ], xmm0
103
- movdqu - 16 [ rcx + r8 ], xmm0
104
- cmp r8 , 32
105
- jbe mset03
106
-
107
- ; count > 32 && count <= 64
108
- movdqu 16 [ rcx ], xmm0
109
- movdqu - 32 [ rcx + r8 ], xmm0
110
- cmp r8 , 64
111
- jbe mset03
112
-
113
- ; count > 64 && count <= 128
114
- movdqu 32 [ rcx ], xmm0
115
- movdqu 48 [ rcx ], xmm0
116
- movdqu - 48 [ rcx + r8 ], xmm0
117
- movdqu - 64 [ rcx + r8 ], xmm0
118
- mset03: ret
119
-
120
- align 16
121
- mset04: mov rax , rcx ; save dst address
122
- test r8b , 24 ; and r8b with 0001 1000
123
- jz mset05
124
-
125
- ; count >= 8 && count <= 16
126
- mov [ rcx ], rdx
127
- mov - 8 [ rcx + r8 ], rdx
128
- ret
129
-
130
- align 16
131
- mset05: test r8b , 4 ; and r8b with 0100
132
- jz mset06
133
-
134
- ; count >= 4 && count < 8
135
- mov [ rcx ], edx
136
- mov - 4 [ rcx + r8 ], edx
137
- ret
138
-
139
- ; count >= 0 && count < 4
140
- align 16
141
- mset06: test r8b , 1 ; and r8b with 0001
142
- jz mset07
143
- mov [ rcx ], dl
144
- mset07: test r8b , 2 ; and r8b with 0010
145
- jz mset08
146
- mov - 2 [ rcx + r8 ], dx
147
- mset08: ret
148
-
149
55
LEAF_END_MARKED JIT_MemSet , _TEXT
150
56
151
- ;JIT_MemCpy - Copy source buffer to destination buffer
57
+ ; void JIT_MemCpy(void* dest, const void* src, size_t count)
152
58
;
153
- ;Purpose:
154
- ; JIT_MemCpy() copies a source memory buffer to a destination memory
155
- ; buffer. This routine recognize overlapping buffers to avoid propogation.
156
- ; For cases where propogation is not a problem, memcpy() can be used.
59
+ ; Purpose:
60
+ ; Copies the values of "count" bytes from the location pointed to
61
+ ; by "src" to the memory block pointed by "dest".
157
62
;
158
- ;Algorithm:
159
- ;Copy to destination based on count as follow
160
- ; count [0, 64]: overlap check not needed
161
- ; count [0, 16]: use 1/2/4/8 bytes width registers
162
- ; count [16, 64]: use 16 bytes width registers (XMM) without loop
163
- ; count [64, upper]: check overlap
164
- ; non-overlap:
165
- ; count [64, 512]: use 16 bytes width registers (XMM) with loops, unrolled 4 times
166
- ; count [512, upper]: use rep movsb
167
- ; overlap::
168
- ; use 16 bytes width registers (XMM) with loops to copy from end to beginnig
63
+ ; Entry:
64
+ ; RCX: void* dest - Pointer to the destination array where content is to be copied.
65
+ ; RDX: const void* src - Pointer to the source of the data to be copied.
66
+ ; R8: size_t count - Number of bytes to copy.
169
67
;
170
- ;Entry:
171
- ; void *dst = pointer to destination buffer
172
- ; const void *src = pointer to source buffer
173
- ; size_t count = number of bytes to copy
68
+ ; Exit:
174
69
;
175
- ;Exit:
176
- ; Returns a pointer to the destination buffer
70
+ ; Uses:
177
71
;
178
- ;Uses :
72
+ ; Exceptions :
179
73
;
180
- ;Exceptions:
181
- ;*******************************************************************************
182
-
183
74
LEAF_ENTRY JIT_MemCpy , _TEXT
75
+ test r8 , r8 ; check if count is zero
76
+ jz Exit_MemCpy ; if zero, no bytes to copy
184
77
185
- mov rax , rcx ; save dst address
186
- cmp r8 , 16
187
- jbe mcpy02
188
-
189
- cmp r8 , 64
190
- jnbe mcpy07
78
+ cmp byte ptr [ rcx ], 0 ; check dest for null
79
+ cmp byte ptr [ rdx ], 0 ; check src for null
191
80
192
- ; count > 16 && count <= 64
193
- align 16
194
- mcpy00: movdqu xmm0 , [ rdx ]
195
- movdqu xmm1 , - 16 [ rdx + r8 ] ; save 16 to 32 bytes src
196
- cmp r8 , 32
197
- jbe mcpy01
198
-
199
- movdqu xmm2 , 16 [ rdx ]
200
- movdqu xmm3 , - 32 [ rdx + r8 ] ; save 32 to 64 bytes src
201
-
202
- ;count > 32 && count <= 64
203
- movdqu 16 [ rcx ], xmm2
204
- movdqu - 32 [ rcx + r8 ], xmm3
205
-
206
- ;count > 16 && count <= 32
207
- mcpy01: movdqu [ rcx ], xmm0
208
- movdqu - 16 [ rcx + r8 ], xmm1
209
- ret
210
-
211
- ; count <= 16
212
- align 16
213
- mcpy02: test r8b , 24 ; test count with 0001 1000
214
- jz mcpy03
215
- ; count >= 8 && count <= 16
216
- mov r9 , [ rdx ]
217
- mov r10 , - 8 [ rdx + r8 ]
218
- mov [ rcx ], r9
219
- mov - 8 [ rcx + r8 ], r10
220
- ret
221
-
222
- align 16
223
- mcpy03: test r8b , 4 ; test count with 0100
224
- jz mcpy04
225
- ; count >= 4 && count < 8
226
- mov r9d , [ rdx ]
227
- mov r10d , - 4 [ rdx + r8 ]
228
- mov [ rcx ], r9d
229
- mov - 4 [ rcx + r8 ], r10d
230
- ret
231
-
232
- ; count >= 0 && count < 4
233
- align 16
234
- mcpy04: test r8 , r8
235
- jz mcpy06 ; count == 1/2/3
236
- mov r9b , [ rdx ] ; save the first byte
237
-
238
- test r8b , 2 ; test count with 0010
239
- jz mcpy05
240
- mov r10w , - 2 [ rdx + r8 ]
241
- mov - 2 [ rcx + r8 ], r10w
242
- mcpy05: mov [ rcx ], r9b
243
- mcpy06: ret
244
-
245
- align 16
246
- ; count > 64, we need to check overlap
247
- mcpy07: mov r9 , rdx ; r9 is src address
248
- sub r9 , rcx ; if src - dst < 0 jump to mcpy11
249
- jb mcpy11 ; if b, destination may overlap
250
-
251
- mcpy08: cmp r8 , 512
252
- jnbe mcpy10
253
-
254
- ; count > 64 && count <= 512
255
- mov r9 , r8
256
- shr r9 , 6 ; count/64
257
-
258
- align 16
259
- mcpy09: movdqu xmm0 , [ rdx ]
260
- movdqu xmm1 , 16 [ rdx ]
261
- movdqu xmm2 , 32 [ rdx ]
262
- movdqu xmm3 , 48 [ rdx ]
263
- movdqu [ rcx ], xmm0
264
- movdqu 16 [ rcx ], xmm1
265
- movdqu 32 [ rcx ], xmm2
266
- movdqu 48 [ rcx ], xmm3
267
- add rdx , 64
268
- add rcx , 64
269
- dec r9
270
- jnz mcpy09
271
-
272
- ; the remainder is from 0 to 63
273
- and r8 , 3fh ; and with 0011 1111
274
- cmp r8 , 16
275
- jnbe mcpy00
81
+ ; Use memmove to handle overlapping buffers for better
82
+ ; compatibility with .NET Framework. Needing to handle
83
+ ; overlapping buffers in cpblk is undefined by the spec.
84
+ jmp memmove ; forward to the CRT implementation
276
85
277
- ; the remainder <= 16
278
- jmp mcpy02
279
- ret
280
-
281
- ; count > 512
282
- align 16
283
- mcpy10: mov r10 , rdi ; save rdi
284
- mov r11 , rsi ; save rsi
285
- mov rdi , rcx ; rdi is dst
286
- mov rsi , rdx ; rsi is src
287
- mov rcx , r8 ; rcx is count
288
- rep movsb ; mov from rsi to rdi
289
- mov rsi , r11 ; restore rsi
290
- mov rdi , r10 ; restore rdi
86
+ Exit_MemCpy:
291
87
ret
292
88
293
- ; The source address is less than the destination address.
294
-
295
- align 16
296
- mcpy11: add r9 , r8 ; src - dst + count
297
- cmp r9 , 0 ; src + count < = dst jump to mcpy08
298
- jle mcpy08
299
-
300
- lea r9 , [ rdx + r8 ] ; r9 is the src + count
301
- lea r10 , [ rcx + r8 ] ; r10 is the dst + count
302
-
303
- mov r11 , r8
304
- shr r11 , 6 ; count/64
305
-
306
- ; count > 64
307
- align 16
308
- mcpy12: movdqu xmm0 , - 16 [ r9 ]
309
- movdqu xmm1 , - 32 [ r9 ]
310
- movdqu xmm2 , - 48 [ r9 ]
311
- movdqu xmm3 , - 64 [ r9 ]
312
- movdqu - 16 [ r10 ], xmm0
313
- movdqu - 32 [ r10 ], xmm1
314
- movdqu - 48 [ r10 ], xmm2
315
- movdqu - 64 [ r10 ], xmm3
316
- sub r9 , 64
317
- sub r10 , 64
318
- dec r11
319
- jnz mcpy12
320
-
321
- ; the remainder is from 0 to 63
322
- and r8 , 3fh ; and with 0011 1111
323
- cmp r8 , 16
324
- jnbe mcpy00
325
-
326
- ; the remainder <= 16
327
- jmp mcpy02
328
-
329
89
LEAF_END_MARKED JIT_MemCpy , _TEXT
330
- end
90
+ end
0 commit comments