Skip to content

Commit 43aa8d2

Browse files
committed
Support aarch64 targets without FPU
Fall back to the old C implementations of various routines when the target doesn't have an FPU. Signed-off-by: Keith Packard <keithp@keithp.com>
1 parent f0546a5 commit 43aa8d2

File tree

2 files changed

+78
-1
lines changed

2 files changed

+78
-1
lines changed

Diff for: compiler-rt/lib/builtins/aarch64/sme-libc-mem-routines.S

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
#include "../assembly.h"
88

9-
#ifdef __aarch64__
9+
#if defined(__aarch64__) && __ARM_FP != 0
1010

1111
#define L(l) .L ## l
1212

Diff for: compiler-rt/lib/builtins/aarch64/sme-libc-routines.c

+77
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,82 @@
11
#include <stddef.h>
22

3+
#if __ARM_FP == 0
4+
// WARNING: When building the scalar versions of these functions you need to
5+
// use the compiler flag "-mllvm -disable-loop-idiom-all" to prevent clang
6+
// from recognising a loop idiom and planting calls to memcpy!
7+
8+
static void *__arm_sc_memcpy_fwd(void *dest, const void *src,
9+
size_t n) __arm_streaming_compatible {
10+
unsigned char *destp = (unsigned char *)dest;
11+
const unsigned char *srcp = (const unsigned char *)src;
12+
for (size_t i = 0; i < n; ++i)
13+
destp[i] = srcp[i];
14+
15+
return dest;
16+
}
17+
18+
// If dest and src overlap then behaviour is undefined, hence we can add the
19+
// restrict keywords here. This also matches the definition of the libc memcpy
20+
// according to the man page.
21+
void *__arm_sc_memcpy(void *__restrict__ dest, const void *__restrict__ src,
22+
size_t n) __arm_streaming_compatible {
23+
return __arm_sc_memcpy_fwd(dest, src, n);
24+
}
25+
26+
void *__arm_sc_memset(void *dest, int c, size_t n) __arm_streaming_compatible {
27+
unsigned char *destp = (unsigned char *)dest;
28+
unsigned char c8 = (unsigned char)c;
29+
for (size_t i = 0; i < n; ++i)
30+
destp[i] = c8;
31+
32+
return dest;
33+
}
34+
35+
static void *__arm_sc_memcpy_rev(void *dest, const void *src,
36+
size_t n) __arm_streaming_compatible {
37+
unsigned char *destp = (unsigned char *)dest;
38+
const unsigned char *srcp = (const unsigned char *)src;
39+
// TODO: Improve performance by copying larger chunks in reverse, or by
40+
// using SVE.
41+
while (n > 0) {
42+
--n;
43+
destp[n] = srcp[n];
44+
}
45+
return dest;
46+
}
47+
48+
// Semantically a memmove is equivalent to the following:
49+
// 1. Copy the entire contents of src to a temporary array that does not
50+
// overlap with src or dest.
51+
// 2. Copy the contents of the temporary array into dest.
52+
void *__arm_sc_memmove(void *dest, const void *src,
53+
size_t n) __arm_streaming_compatible {
54+
unsigned char *destp = (unsigned char *)dest;
55+
const unsigned char *srcp = (const unsigned char *)src;
56+
57+
// If src and dest don't overlap then just invoke memcpy
58+
if ((srcp > (destp + n)) || (destp > (srcp + n)))
59+
return __arm_sc_memcpy_fwd(dest, src, n);
60+
61+
// Overlap case 1:
62+
// src: Low | -> | High
63+
// dest: Low | -> | High
64+
// Here src is always ahead of dest at a higher addres. If we first read a
65+
// chunk of data from src we can safely write the same chunk to dest without
66+
// corrupting future reads of src.
67+
if (srcp > destp)
68+
return __arm_sc_memcpy_fwd(dest, src, n);
69+
70+
// Overlap case 2:
71+
// src: Low | -> | High
72+
// dest: Low | -> | High
73+
// While we're in the overlap region we're always corrupting future reads of
74+
// src when writing to dest. An efficient way to do this is to copy the data
75+
// in reverse by starting at the highest address.
76+
return __arm_sc_memcpy_rev(dest, src, n);
77+
}
78+
#endif
79+
380
const void *__arm_sc_memchr(const void *src, int c,
481
size_t n) __arm_streaming_compatible {
582
const unsigned char *srcp = (const unsigned char *)src;

0 commit comments

Comments
 (0)