Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 68 additions & 0 deletions src/rp2_common/hardware_divider/include/hardware/divider_helper.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#include "hardware/regs/addressmap.h"
#include "hardware/regs/sio.h"

#if SIO_DIV_CSR_READY_LSB == 0
.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1
#else
need to change SHIFT above
#endif
#if SIO_DIV_CSR_DIRTY_LSB == 1
.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2
#else
need to change SHIFT above
#endif

// SIO_BASE ptr in r2; pushes r4-r7, lr to stack
// requires that division started at least 2 cycles prior to the start of the macro
.macro save_div_state_and_lr
// originally we did this, however a) it uses r3, and b) the push takes 6 cycles, b)
// any IRQ which uses the divider will necessarily put the data back, which will
// immediately make it ready
//
// // ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
// // // wait for results as we can't save signed-ness of operation
// // 1:
// // lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
// // bcc 1b

// 6 cycles
push {r4, r5, r6, r7, lr}
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
ldr r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
ldr r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
.endm

// restores divider state from r4-r7, then pops them and pc
.macro restore_div_state_and_return
// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order
//
// it is worth considering what happens if we are interrupted
//
// after writing r4: we are DIRTY and !READY
// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be
// saved/restored correctly and we'll restore the rest ourselves
// after writing r4, r5: we are DIRTY and !READY
// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor
// at least will be saved/restored correctly and and we'll restore the rest ourselves
// after writing r4, r5, r6: we are DIRTY and READY
// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves),
// and we'll restore the remainder after the fact

// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space
// and so 4 reads is cheaper (and we don't have to adjust r2)
// note also, that we must restore via UDIVI* rather than SDIVI* to prevent the quotient/remainder being negated on read based
// on the signs of the inputs
str r4, [r2, #SIO_DIV_UDIVIDEND_OFFSET]
str r5, [r2, #SIO_DIV_UDIVISOR_OFFSET]
str r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
pop {r4, r5, r6, r7, pc}
.endm
82 changes: 18 additions & 64 deletions src/rp2_common/pico_divider/divider.S
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
* SPDX-License-Identifier: BSD-3-Clause
*/

#include "hardware/regs/sio.h"
#include "hardware/regs/addressmap.h"
#include "hardware/divider_helper.S"

.syntax unified
.cpu cortex-m0plus
Expand Down Expand Up @@ -34,17 +34,6 @@
#endif
.endm

#if SIO_DIV_CSR_READY_LSB == 0
.equ SIO_DIV_CSR_READY_SHIFT_FOR_CARRY, 1
#else
need to change SHIFT above
#endif
#if SIO_DIV_CSR_DIRTY_LSB == 1
.equ SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY, 2
#else
need to change SHIFT above
#endif

@ wait 8-n cycles for the hardware divider
.macro wait_div n
.rept (8-\n) / 2
Expand All @@ -56,58 +45,17 @@ need to change SHIFT above
.endif
.endm


#if (SIO_DIV_SDIVISOR_OFFSET != SIO_DIV_SDIVIDEND_OFFSET + 4) || (SIO_DIV_QUOTIENT_OFFSET != SIO_DIV_SDIVISOR_OFFSET + 4) || (SIO_DIV_REMAINDER_OFFSET != SIO_DIV_QUOTIENT_OFFSET + 4)
#error register layout has changed - we rely on this order to make sure we save/restore in the right order
#endif

#if !PICO_DIVIDER_DISABLE_INTERRUPTS

# SIO_BASE ptr in r2
.macro save_div_state_and_lr
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
# wait for results as we can't save signed-ness of operation
1:
lsrs r3, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
bcc 1b
push {r4, r5, r6, r7, lr}
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
ldr r4, [r2, #SIO_DIV_SDIVIDEND_OFFSET]
ldr r5, [r2, #SIO_DIV_SDIVISOR_OFFSET]
ldr r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
ldr r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
.endm

.macro restore_div_state_and_return
// writing sdividend (r4), sdivisor (r5), quotient (r6), remainder (r7) in that order
//
// it is worth considering what happens if we are interrupted
//
// after writing r4: we are DIRTY and !READY
// ... interruptor using div will complete based on incorrect inputs, but dividend at least will be
// saved/restored correctly and we'll restore the rest ourselves
// after writing r4, r5: we are DIRTY and !READY
// ... interruptor using div will complete based on possibly wrongly signed inputs, but dividend, divisor
// at least will be saved/restored correctly and and we'll restore the rest ourselves
// after writing r4, r5, r6: we are DIRTY and READY
// ... interruptor using div will dividend, divisor, quotient registers as is (what we just restored ourselves),
// and we'll restore the remainder after the fact

// note we are not use STM not because it can be restarted due to interrupt which is harmless, more because this is 1 cycle IO space
// and so 4 reads is cheaper (and we don't have to adjust r2)
str r4, [r2, #SIO_DIV_SDIVIDEND_OFFSET]
str r5, [r2, #SIO_DIV_SDIVISOR_OFFSET]
str r7, [r2, #SIO_DIV_REMAINDER_OFFSET]
str r6, [r2, #SIO_DIV_QUOTIENT_OFFSET]
pop {r4, r5, r6, r7, pc}
.endm

.macro save_div_state_and_lr_64
push {r4, r5, r6, r7, lr}
ldr r6, =SIO_BASE
1:
ldr r5, [r6, #SIO_DIV_CSR_OFFSET]
# wait for results as we can't save signed-ness of operation
// wait for results as we can't save signed-ness of operation
lsrs r5, #SIO_DIV_CSR_READY_SHIFT_FOR_CARRY
bcc 1b
// note we must read quotient last, and since it isn't the last reg, we'll not use ldmia!
Expand Down Expand Up @@ -154,17 +102,18 @@ wrapper_func __aeabi_idivmod
regular_func div_s32s32
regular_func divmod_s32s32
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs divmod_s32s32_savestate
regular_func divmod_s32s32_unsafe
#else
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
# are the hardware_divider functions that can be used instead anyway
// to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
// in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
// are the hardware_divider functions that can be used instead anyway
regular_func divmod_s32s32_unsafe
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
ldr r2, =(SIO_BASE)
mrs r3, PRIMASK
cpsid i
Expand Down Expand Up @@ -203,6 +152,8 @@ regular_func divmod_s32s32_unsafe
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
.align 2
regular_func divmod_s32s32_savestate
// note that we must be at least 2 cycles into division at this point,
// which we are because of the firty check before getting here (and of course the function call before that)
save_div_state_and_lr
bl divmod_s32s32_unsafe
restore_div_state_and_return
Expand All @@ -215,17 +166,18 @@ regular_func divmod_u32u32
wrapper_func __aeabi_uidiv
wrapper_func __aeabi_uidivmod
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r3, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r3, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs divmod_u32u32_savestate
regular_func divmod_u32u32_unsafe
#else
# to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
# in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
# are the hardware_divider functions that can be used instead anyway
// to avoid too much source code spaghetti with restoring interrupts, we make this the same as the other funcs
// in the PICO_DIVIDER_DISABLE_INTERRUPTS case; i.e. it is not a faster function; this seems reasonable as there
// are the hardware_divider functions that can be used instead anyway
regular_func divmod_u32u32_unsafe
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
ldr r2, =(SIO_BASE)
mrs r3, PRIMASK
cpsid i
Expand Down Expand Up @@ -273,9 +225,9 @@ wrapper_func __aeabi_ldivmod
regular_func div_s64s64
regular_func divmod_s64s64
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
mov r2, ip
Expand All @@ -287,6 +239,7 @@ divmod_s64s64_savestate:
bl divmod_s64s64_unsafe
restore_div_state_and_return_64
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
Expand All @@ -300,9 +253,9 @@ wrapper_func __aeabi_uldivmod
regular_func div_u64u64
regular_func divmod_u64u64
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
# to support IRQ usage we must save/restore
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
mov r2, ip
Expand All @@ -314,6 +267,7 @@ regular_func divmod_u64u64_savestate
bl divmod_u64u64_unsafe
restore_div_state_and_return_64
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
Expand Down
78 changes: 68 additions & 10 deletions src/rp2_common/pico_double/double_aeabi.S
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include "pico/asm_helper.S"
#include "pico/bootrom/sf_table.h"
#include "hardware/divider_helper.S"

__pre_init __aeabi_double_init, 00020

Expand Down Expand Up @@ -131,16 +132,16 @@ regular_func pop_r8_r11
mov r11,r7
bx r14

# note generally each function is in a separate section unless there is fall thru or branching between them
# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
// note generally each function is in a separate section unless there is fall thru or branching between them
// note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool

# note functions are word aligned except where they are an odd number of linear instructions
// note functions are word aligned except where they are an odd number of linear instructions

// double FUNC_NAME(__aeabi_dadd)(double, double) double-precision addition
double_wrapper_section __aeabi_darithmetic
// double FUNC_NAME(__aeabi_drsub)(double x, double y) double-precision reverse subtraction, y - x

# frsub first because it is the only one that needs alignment
// frsub first because it is the only one that needs alignment
.align 2
wrapper_func __aeabi_drsub
eors r0, r1
Expand Down Expand Up @@ -177,7 +178,35 @@ wrapper_func_d2 __aeabi_ddiv
b ddiv_dsub_nan_helper
1:
#endif
shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs ddiv_save_state
mov r2, ip
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl ddiv_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
ddiv_shim_call:
shimmable_table_tail_call SF_TABLE_FDIV ddiv_shim

#if !PICO_DIVIDER_DISABLE_INTERRUPTS
ddiv_save_state:
ldr r2, =(SIO_BASE)
save_div_state_and_lr
mov r2, ip
bl ddiv_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif

ddiv_dsub_nan_helper:
#if PICO_DOUBLE_PROPAGATE_NANS
Expand Down Expand Up @@ -592,6 +621,8 @@ regular_func sincostan_remainder
ldr r2, =0x54442D18 // 2 * M_PI
ldr r3, =0x401921FB
push {lr}
// note remainder only uses the divider thru integer divider functions
// which save and restore themselves
bl remainder
pop {pc}

Expand Down Expand Up @@ -752,13 +783,40 @@ double_wrapper_section tan
wrapper_func tan
// rom version only works for -1024 < angle < 1024
lsls r2, r1, #2
bcc 1f
bcc dtan_in_range
lsrs r2, #22
cmp r2, #9
bge 2f
1:
bge dtan_angle_out_of_range
dtan_in_range:
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
// to support IRQ usage (or context switch) we must save/restore divider state around call if state is dirty
mov ip, r2
ldr r2, =(SIO_BASE)
ldr r2, [r2, #SIO_DIV_CSR_OFFSET]
lsrs r2, #SIO_DIV_CSR_DIRTY_SHIFT_FOR_CARRY
bcs dtan_save_state
mov r2, ip
#else
// to avoid worrying about IRQs (or context switches), simply disable interrupts around call
push {r4, lr}
mrs r4, PRIMASK
cpsid i
bl dtan_shim_call
msr PRIMASK, r4
pop {r4, pc}
#endif
dtan_shim_call:
shimmable_table_tail_call SF_TABLE_FTAN dtan_shim
2:
#if !PICO_DIVIDER_DISABLE_INTERRUPTS
dtan_save_state:
ldr r2, =(SIO_BASE)
save_div_state_and_lr
mov r2, ip
bl dtan_shim_call
ldr r2, =(SIO_BASE)
restore_div_state_and_return
#endif
dtan_angle_out_of_range:
#if PICO_DOUBLE_PROPAGATE_NANS
lsls r2, r1, #1
asrs r2, #21
Expand All @@ -775,7 +833,7 @@ wrapper_func tan
bl sincostan_remainder
pop {r2}
mov lr, r2
b 1b
b dtan_in_range

double_wrapper_section atan2
wrapper_func_d2 atan2
Expand Down
Loading