|
1 | 1 | #include <stddef.h>
|
2 | 2 |
|
| 3 | +#if __ARM_FP == 0 |
| 4 | +// WARNING: When building the scalar versions of these functions you need to |
| 5 | +// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang |
| 6 | +// from recognising a loop idiom and planting calls to memcpy! |
| 7 | + |
| 8 | +static void *__arm_sc_memcpy_fwd(void *dest, const void *src, |
| 9 | + size_t n) __arm_streaming_compatible { |
| 10 | + unsigned char *destp = (unsigned char *)dest; |
| 11 | + const unsigned char *srcp = (const unsigned char *)src; |
| 12 | + for (size_t i = 0; i < n; ++i) |
| 13 | + destp[i] = srcp[i]; |
| 14 | + |
| 15 | + return dest; |
| 16 | +} |
| 17 | + |
| 18 | +// If dest and src overlap then behaviour is undefined, hence we can add the |
| 19 | +// restrict keywords here. This also matches the definition of the libc memcpy |
| 20 | +// according to the man page. |
| 21 | +void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src, |
| 22 | + size_t n) __arm_streaming_compatible { |
| 23 | + return __arm_sc_memcpy_fwd(dest, src, n); |
| 24 | +} |
| 25 | + |
| 26 | +void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible { |
| 27 | + unsigned char *destp = (unsigned char *)dest; |
| 28 | + unsigned char c8 = (unsigned char)c; |
| 29 | + for (size_t i = 0; i < n; ++i) |
| 30 | + destp[i] = c8; |
| 31 | + |
| 32 | + return dest; |
| 33 | +} |
| 34 | + |
| 35 | +static void *__arm_sc_memcpy_rev(void *dest, const void *src, |
| 36 | + size_t n) __arm_streaming_compatible { |
| 37 | + unsigned char *destp = (unsigned char *)dest; |
| 38 | + const unsigned char *srcp = (const unsigned char *)src; |
| 39 | + // TODO: Improve performance by copying larger chunks in reverse, or by |
| 40 | + // using SVE. |
| 41 | + while (n > 0) { |
| 42 | + --n; |
| 43 | + destp[n] = srcp[n]; |
| 44 | + } |
| 45 | + return dest; |
| 46 | +} |
| 47 | + |
| 48 | +// Semantically a memmove is equivalent to the following: |
| 49 | +// 1. Copy the entire contents of src to a temporary array that does not |
| 50 | +// overlap with src or dest. |
| 51 | +// 2. Copy the contents of the temporary array into dest. |
| 52 | +void *__arm_sc_memmove(void *dest, const void *src, |
| 53 | + size_t n) __arm_streaming_compatible { |
| 54 | + unsigned char *destp = (unsigned char *)dest; |
| 55 | + const unsigned char *srcp = (const unsigned char *)src; |
| 56 | + |
| 57 | + // If src and dest don't overlap then just invoke memcpy |
| 58 | + if ((srcp > (destp + n)) || (destp > (srcp + n))) |
| 59 | + return __arm_sc_memcpy_fwd(dest, src, n); |
| 60 | + |
| 61 | + // Overlap case 1: |
| 62 | + // src: Low | -> | High |
| 63 | + // dest: Low | -> | High |
| 64 | + // Here src is always ahead of dest at a higher addres. If we first read a |
| 65 | + // chunk of data from src we can safely write the same chunk to dest without |
| 66 | + // corrupting future reads of src. |
| 67 | + if (srcp > destp) |
| 68 | + return __arm_sc_memcpy_fwd(dest, src, n); |
| 69 | + |
| 70 | + // Overlap case 2: |
| 71 | + // src: Low | -> | High |
| 72 | + // dest: Low | -> | High |
| 73 | + // While we're in the overlap region we're always corrupting future reads of |
| 74 | + // src when writing to dest. An efficient way to do this is to copy the data |
| 75 | + // in reverse by starting at the highest address. |
| 76 | + return __arm_sc_memcpy_rev(dest, src, n); |
| 77 | +} |
| 78 | +#endif |
| 79 | + |
3 | 80 | const void *__arm_sc_memchr(const void *src, int c,
|
4 | 81 | size_t n) __arm_streaming_compatible {
|
5 | 82 | const unsigned char *srcp = (const unsigned char *)src;
|
|
0 commit comments