Skip to content

Commit 20f3337

Browse files
committed
x86: don't use REP_GOOD or ERMS for small memory clearing
The modern target to use is FSRS (Fast Short REP STOS), and the other cases should only be used for bigger areas (ie mainly things like page clearing). Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 parent 68674f9 commit 20f3337

File tree

1 file changed

+11
-36
lines changed

1 file changed

+11
-36
lines changed

arch/x86/lib/memset_64.S

Lines changed: 11 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,22 @@
1818
* rdx count (bytes)
1919
*
2020
* rax original destination
21+
*
22+
* The FSRS alternative should be done inline (avoiding the call and
23+
* the disgusting return handling), but that would require some help
24+
* from the compiler for better calling conventions.
25+
*
26+
* The 'rep stosb' itself is small enough to replace the call, but all
27+
* the register moves blow up the code. And two of them are "needed"
28+
* only for the return value that is the same as the source input,
29+
* which the compiler could/should do much better anyway.
2130
*/
2231
SYM_FUNC_START(__memset)
23-
/*
24-
* Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
25-
* to use it when possible. If not available, use fast string instructions.
26-
*
27-
* Otherwise, use original memset function.
28-
*/
29-
ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
30-
"jmp memset_erms", X86_FEATURE_ERMS
32+
ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS
3133

3234
movq %rdi,%r9
35+
movb %sil,%al
3336
movq %rdx,%rcx
34-
andl $7,%edx
35-
shrq $3,%rcx
36-
/* expand byte value */
37-
movzbl %sil,%esi
38-
movabs $0x0101010101010101,%rax
39-
imulq %rsi,%rax
40-
rep stosq
41-
movl %edx,%ecx
4237
rep stosb
4338
movq %r9,%rax
4439
RET
@@ -48,26 +43,6 @@ EXPORT_SYMBOL(__memset)
4843
SYM_FUNC_ALIAS(memset, __memset)
4944
EXPORT_SYMBOL(memset)
5045

51-
/*
52-
* ISO C memset - set a memory block to a byte value. This function uses
53-
* enhanced rep stosb to override the fast string function.
54-
* The code is simpler and shorter than the fast string function as well.
55-
*
56-
* rdi destination
57-
* rsi value (char)
58-
* rdx count (bytes)
59-
*
60-
* rax original destination
61-
*/
62-
SYM_FUNC_START_LOCAL(memset_erms)
63-
movq %rdi,%r9
64-
movb %sil,%al
65-
movq %rdx,%rcx
66-
rep stosb
67-
movq %r9,%rax
68-
RET
69-
SYM_FUNC_END(memset_erms)
70-
7146
SYM_FUNC_START_LOCAL(memset_orig)
7247
movq %rdi,%r10
7348

0 commit comments

Comments
 (0)