Skip to content

Commit

Permalink
riscv: optimized memset
Browse files Browse the repository at this point in the history
The generic memset is defined as a byte at time write. This is always
safe, but it's slower than a 4 byte or even 8 byte write.

Write a generic memset which fills the data one byte at time until the
destination is aligned, then fills using the largest size allowed,
and finally fills the remaining data one byte at time.

Signed-off-by: Matteo Croce <mcroce@microsoft.com>
  • Loading branch information
teknoraver committed Jun 23, 2021
1 parent 3f6444b commit dfe980d
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 135 deletions.
10 changes: 3 additions & 7 deletions arch/riscv/include/asm/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,10 @@
#ifndef _ASM_RISCV_STRING_H
#define _ASM_RISCV_STRING_H

#include <linux/types.h>
#include <linux/linkage.h>

#define __HAVE_ARCH_MEMSET
extern asmlinkage void *memset(void *, int, size_t);
extern asmlinkage void *__memset(void *, int, size_t);

#ifdef CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE
#define __HAVE_ARCH_MEMSET
void *memset(void *s, int c, size_t count);
void *__memset(void *s, int c, size_t count);
#define __HAVE_ARCH_MEMCPY
void *memcpy(void *dest, const void *src, size_t count);
void *__memcpy(void *dest, const void *src, size_t count);
Expand Down
1 change: 0 additions & 1 deletion arch/riscv/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ obj-y += syscall_table.o
obj-y += sys_riscv.o
obj-y += time.o
obj-y += traps.o
obj-y += riscv_ksyms.o
obj-y += stacktrace.o
obj-y += cacheinfo.o
obj-y += patch.o
Expand Down
13 changes: 0 additions & 13 deletions arch/riscv/kernel/riscv_ksyms.c

This file was deleted.

1 change: 0 additions & 1 deletion arch/riscv/lib/Makefile
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
lib-y += delay.o
lib-y += memset.o
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
lib-$(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) += string.o
Expand Down
113 changes: 0 additions & 113 deletions arch/riscv/lib/memset.S

This file was deleted.

39 changes: 39 additions & 0 deletions arch/riscv/lib/string.c
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,42 @@ EXPORT_SYMBOL(__memmove);

void *memmove(void *dest, const void *src, size_t count) __weak __alias(__memmove);
EXPORT_SYMBOL(memmove);

void *__memset(void *s, int c, size_t count)
{
union types dest = { .as_u8 = s };

if (count >= MIN_THRESHOLD) {
unsigned long cu = (unsigned long)c;

/* Compose an ulong with 'c' repeated 4/8 times */
cu |= cu << 8;
cu |= cu << 16;
#if BITS_PER_LONG == 64
cu |= cu << 32;
#endif

if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
/*
* Fill the buffer one byte at time until
* the destination is word aligned.
*/
for (; count && dest.as_uptr & mask; count--)
*dest.as_u8++ = c;
}

/* Copy using the largest size allowed */
for (; count >= bytes_long; count -= bytes_long)
*dest.as_ulong++ = cu;
}

/* copy the remainder */
while (count--)
*dest.as_u8++ = c;

return s;
}
EXPORT_SYMBOL(__memset);

void *memset(void *s, int c, size_t count) __weak __alias(__memset);
EXPORT_SYMBOL(memset);

0 comments on commit dfe980d

Please sign in to comment.