Commit 135b070
Nicolas Pitre
lib/os/prf.c: alternate implementation for _ldiv5()
The _ldiv5() is an optimized divide-by-5 function that is smaller and
faster than the generic libgcc implementation.
Yet it can be made even smaller and faster with this replacement
implementation based on a reciprocal multiplication plus some tricks.
For example, here's the assembly from the original code on ARM:
_ldiv5:
ldr r3, [r0]
movw ip, zephyrproject-rtos#52429
ldr r1, [r0, zephyrproject-rtos#4]
movt ip, 52428
adds r3, r3, zephyrproject-rtos#2
push {r4, r5, r6, r7, lr}
mov lr, #0
adc r1, r1, lr
adds r2, lr, lr
umull r7, r6, ip, r1
lsr r6, r6, zephyrproject-rtos#2
adc r7, r6, r6
adds r2, r2, r2
adc r7, r7, r7
adds r2, r2, lr
adc r7, r7, r6
subs r3, r3, r2
sbc r7, r1, r7
lsr r2, r3, zephyrproject-rtos#3
orr r2, r2, r7, lsl zephyrproject-rtos#29
umull r2, r1, ip, r2
lsr r2, r1, zephyrproject-rtos#2
lsr r7, r1, zephyrproject-rtos#31
lsl r1, r2, zephyrproject-rtos#3
adds r4, lr, r1
adc r5, r6, r7
adds r2, r1, r1
adds r2, r2, r2
adds r2, r2, r1
subs r2, r3, r2
umull r3, r2, ip, r2
lsr r2, r2, zephyrproject-rtos#2
adds r4, r4, r2
adc r5, r5, #0
strd r4, [r0]
pop {r4, r5, r6, r7, pc}
And here's the resulting assembly with this commit applied:
_ldiv5:
push {r4, r5, r6, r7}
movw r4, zephyrproject-rtos#13107
ldr r6, [r0]
movt r4, 13107
ldr r1, [r0, zephyrproject-rtos#4]
mov r3, #0
umull r6, r7, r6, r4
add r2, r4, r4, lsl zephyrproject-rtos#1
umull r4, r5, r1, r4
adds r1, r6, r2
adc r2, r7, r2
adds ip, r6, r4
adc r1, r7, r5
adds r2, ip, r2
adc r2, r1, r3
adds r2, r4, r2
adc r3, r5, r3
strd r2, [r0]
pop {r4, r5, r6, r7}
bx lr
So we're down to 20 instructions from 36 initially, with only 2 umull
instructions instead of 3, and slightly smaller stack footprint.
Signed-off-by: Nicolas Pitre <npitre@baylibre.com>1 parent f4a4583 commit 135b070
1 file changed
+41
-22
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
130 | 130 | | |
131 | 131 | | |
132 | 132 | | |
133 | | - | |
134 | | - | |
135 | | - | |
136 | | - | |
137 | | - | |
138 | | - | |
139 | | - | |
140 | | - | |
| 133 | + | |
| 134 | + | |
| 135 | + | |
| 136 | + | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
141 | 151 | | |
142 | 152 | | |
143 | 153 | | |
144 | | - | |
145 | | - | |
146 | | - | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
147 | 158 | | |
148 | | - | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
| 163 | + | |
| 164 | + | |
149 | 165 | | |
150 | 166 | | |
151 | | - | |
152 | | - | |
| 167 | + | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
153 | 171 | | |
154 | | - | |
| 172 | + | |
155 | 173 | | |
156 | | - | |
157 | | - | |
158 | | - | |
159 | | - | |
160 | | - | |
161 | | - | |
| 174 | + | |
| 175 | + | |
| 176 | + | |
| 177 | + | |
| 178 | + | |
| 179 | + | |
| 180 | + | |
162 | 181 | | |
163 | | - | |
| 182 | + | |
164 | 183 | | |
165 | 184 | | |
166 | 185 | | |
| |||
0 commit comments