Skip to content

Commit 268acb7

Browse files
agattidpgeorge
authored andcommitted
py/emitinlinerv32: Add inline assembler support for RV32.
This commit adds support for writing inline assembler functions when targeting a RV32IMC processor. Given that this takes up a bit of rodata space due to its large instruction decoding table and its extensive error messages, it is enabled by default only on offline targets such as mpy-cross and the qemu port. Signed-off-by: Alessandro Gatti <a.gatti@frob.it>
1 parent 3044233 commit 268acb7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+2649
-45
lines changed

mpy-cross/mpconfigport.h

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
#define MICROPY_EMIT_INLINE_XTENSA (1)
4848
#define MICROPY_EMIT_XTENSAWIN (1)
4949
#define MICROPY_EMIT_RV32 (1)
50+
#define MICROPY_EMIT_INLINE_RV32 (1)
5051
#define MICROPY_EMIT_NATIVE_DEBUG (1)
5152
#define MICROPY_EMIT_NATIVE_DEBUG_PRINTER (&mp_stdout_print)
5253

ports/qemu/Makefile

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,10 @@ QSTR_DEFS = qstrdefsport.h
1919
MICROPY_ROM_TEXT_COMPRESSION ?= 1
2020

2121
ifeq ($(QEMU_ARCH),arm)
22-
FROZEN_MANIFEST ?= "require('unittest'); freeze('test-frzmpy')"
22+
FROZEN_MANIFEST ?= "require('unittest'); freeze('test-frzmpy', ('frozen_asm_thumb.py', 'frozen_const.py', 'frozen_viper.py', 'native_frozen_align.py'))"
2323
endif
2424
ifeq ($(QEMU_ARCH),riscv32)
25-
FROZEN_MANIFEST ?= "require('unittest'); freeze('test-frzmpy', ('frozen_const.py', 'frozen_viper.py', 'native_frozen_align.py'))"
25+
FROZEN_MANIFEST ?= "require('unittest'); freeze('test-frzmpy', ('frozen_asm_rv32.py', 'frozen_const.py', 'frozen_viper.py', 'native_frozen_align.py'))"
2626
endif
2727

2828
# include py core make definitions

ports/qemu/boards/MICROBIT.mk

+3
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,6 @@ LDSCRIPT = mcu/arm/nrf51.ld
1111
SRC_BOARD_O = shared/runtime/gchelper_native.o shared/runtime/gchelper_thumb1.o
1212

1313
MPY_CROSS_FLAGS += -march=armv7m
14+
15+
# These RV32 tests don't run on Thumb, so exclude them.
16+
RUN_TESTS_ARGS = --exclude 'inlineasm/rv32'

ports/qemu/boards/MPS2_AN385.mk

+3
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@ LDSCRIPT = mcu/arm/mps2.ld
1010
SRC_BOARD_O = shared/runtime/gchelper_native.o shared/runtime/gchelper_thumb2.o
1111

1212
MPY_CROSS_FLAGS += -march=armv7m
13+
14+
# These RV32 tests don't run on Thumb, so exclude them.
15+
RUN_TESTS_ARGS = --exclude 'inlineasm/rv32'

ports/qemu/boards/NETDUINO2.mk

+3
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,6 @@ LDSCRIPT = mcu/arm/stm32.ld
1010
SRC_BOARD_O = shared/runtime/gchelper_native.o shared/runtime/gchelper_thumb2.o
1111

1212
MPY_CROSS_FLAGS += -march=armv7m
13+
14+
# These RV32 tests don't run on Thumb, so exclude them.
15+
RUN_TESTS_ARGS = --exclude 'inlineasm/rv32'

ports/qemu/boards/SABRELITE.mk

+1-1
Original file line numberDiff line numberDiff line change
@@ -16,4 +16,4 @@ SRC_BOARD_O = shared/runtime/gchelper_generic.o
1616
MPY_CROSS_FLAGS += -march=armv6
1717

1818
# These tests don't work on Cortex-A9, so exclude them.
19-
RUN_TESTS_ARGS = --exclude 'inlineasm/thumb/(asmdiv|asmspecialregs).py'
19+
RUN_TESTS_ARGS = --exclude 'inlineasm/rv32|inlineasm/thumb/(asmdiv|asmspecialregs).py'

ports/qemu/boards/VIRT_RV32.mk

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ SRC_BOARD_O += shared/runtime/gchelper_native.o shared/runtime/gchelper_rv32i.o
1111
MPY_CROSS_FLAGS += -march=rv32imc
1212

1313
# These Thumb tests don't run on RV32, so exclude them.
14-
RUN_TESTS_ARGS = --exclude 'inlineasm/thumb|qemu/asm_test'
14+
RUN_TESTS_ARGS = --exclude 'inlineasm/thumb'
1515

1616
RUN_NATMODTESTS_ARGS = --arch rv32imc

ports/qemu/mpconfigport.h

+1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
#define MICROPY_MAKE_POINTER_CALLABLE(p) ((void *)((mp_uint_t)(p) | 1))
4040
#elif defined(__riscv)
4141
#define MICROPY_EMIT_RV32 (1)
42+
#define MICROPY_EMIT_INLINE_RV32 (1)
4243
#endif
4344

4445
#define MICROPY_MALLOC_USES_ALLOCATED_SIZE (1)
+31
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Test freezing inline-asm code.
2+
3+
# ruff: noqa: F821 - @asm_rv32 decorator adds names to function scope
4+
5+
import micropython
6+
7+
8+
@micropython.asm_rv32
9+
def asm_add(a0, a1):
10+
add(a0, a0, a1)
11+
12+
13+
@micropython.asm_rv32
14+
def asm_add1(a0) -> object:
15+
slli(a0, a0, 1)
16+
addi(a0, a0, 3)
17+
18+
19+
@micropython.asm_rv32
20+
def asm_cast_bool(a0) -> bool:
21+
pass
22+
23+
24+
@micropython.asm_rv32
25+
def asm_shift_int(a0) -> int:
26+
slli(a0, a0, 29)
27+
28+
29+
@micropython.asm_rv32
30+
def asm_shift_uint(a0) -> uint:
31+
slli(a0, a0, 29)

py/asmrv32.c

+14-20
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,6 @@
4545
#endif
4646

4747
#define INTERNAL_TEMPORARY ASM_RV32_REG_S0
48-
#define AVAILABLE_REGISTERS_COUNT 32
49-
50-
#define IS_IN_C_REGISTER_WINDOW(register_number) \
51-
(((register_number) >= ASM_RV32_REG_X8) && ((register_number) <= ASM_RV32_REG_X15))
52-
#define MAP_IN_C_REGISTER_WINDOW(register_number) \
53-
((register_number) - ASM_RV32_REG_X8)
5448

5549
#define FIT_UNSIGNED(value, bits) (((value) & ~((1U << (bits)) - 1)) == 0)
5650
#define FIT_SIGNED(value, bits) \
@@ -106,7 +100,6 @@ static void split_immediate(mp_int_t immediate, mp_uint_t *upper, mp_uint_t *low
106100
// Turn the lower half from unsigned to signed.
107101
if ((*lower & 0x800) != 0) {
108102
*upper += 0x1000;
109-
*lower -= 0x1000;
110103
}
111104
}
112105

@@ -180,7 +173,7 @@ void asm_rv32_emit_optimised_load_immediate(asm_rv32_t *state, mp_uint_t rd, mp_
180173

181174
static void emit_registers_store(asm_rv32_t *state, mp_uint_t registers_mask) {
182175
mp_uint_t offset = 0;
183-
for (mp_uint_t register_index = 0; register_index < AVAILABLE_REGISTERS_COUNT; register_index++) {
176+
for (mp_uint_t register_index = 0; register_index < RV32_AVAILABLE_REGISTERS_COUNT; register_index++) {
184177
if (registers_mask & (1U << register_index)) {
185178
assert(FIT_UNSIGNED(offset >> 2, 6) && "Registers save stack offset out of range.");
186179
// c.swsp register, offset
@@ -192,7 +185,7 @@ static void emit_registers_store(asm_rv32_t *state, mp_uint_t registers_mask) {
192185

193186
static void emit_registers_load(asm_rv32_t *state, mp_uint_t registers_mask) {
194187
mp_uint_t offset = 0;
195-
for (mp_uint_t register_index = 0; register_index < AVAILABLE_REGISTERS_COUNT; register_index++) {
188+
for (mp_uint_t register_index = 0; register_index < RV32_AVAILABLE_REGISTERS_COUNT; register_index++) {
196189
if (registers_mask & (1U << register_index)) {
197190
assert(FIT_UNSIGNED(offset >> 2, 6) && "Registers load stack offset out of range.");
198191
// c.lwsp register, offset
@@ -262,7 +255,7 @@ static bool calculate_displacement_for_label(asm_rv32_t *state, mp_uint_t label,
262255

263256
void asm_rv32_entry(asm_rv32_t *state, mp_uint_t locals) {
264257
state->saved_registers_mask |= (1U << REG_FUN_TABLE) | (1U << REG_LOCAL_1) | \
265-
(1U << REG_LOCAL_2) | (1U << REG_LOCAL_3) | (1U << INTERNAL_TEMPORARY);
258+
(1U << REG_LOCAL_2) | (1U << REG_LOCAL_3);
266259
state->locals_count = locals;
267260
emit_function_prologue(state, state->saved_registers_mask);
268261
}
@@ -281,10 +274,11 @@ void asm_rv32_emit_call_ind(asm_rv32_t *state, mp_uint_t index) {
281274
mp_uint_t offset = index * ASM_WORD_SIZE;
282275
state->saved_registers_mask |= (1U << ASM_RV32_REG_RA);
283276

284-
if (IS_IN_C_REGISTER_WINDOW(REG_FUN_TABLE) && IS_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY) && FIT_UNSIGNED(offset, 6)) {
277+
if (RV32_IS_IN_C_REGISTER_WINDOW(REG_FUN_TABLE) && RV32_IS_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY) && FIT_UNSIGNED(offset, 6)) {
278+
state->saved_registers_mask |= (1U << INTERNAL_TEMPORARY);
285279
// c.lw temporary, offset(fun_table)
286280
// c.jalr temporary
287-
asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY), MAP_IN_C_REGISTER_WINDOW(REG_FUN_TABLE), offset);
281+
asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(INTERNAL_TEMPORARY), RV32_MAP_IN_C_REGISTER_WINDOW(REG_FUN_TABLE), offset);
288282
asm_rv32_opcode_cjalr(state, INTERNAL_TEMPORARY);
289283
return;
290284
}
@@ -341,9 +335,9 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_
341335
ptrdiff_t displacement = 0;
342336
bool can_emit_short_jump = calculate_displacement_for_label(state, label, &displacement);
343337

344-
if (can_emit_short_jump && FIT_SIGNED(displacement, 8) && IS_IN_C_REGISTER_WINDOW(rs)) {
338+
if (can_emit_short_jump && FIT_SIGNED(displacement, 8) && RV32_IS_IN_C_REGISTER_WINDOW(rs)) {
345339
// c.bnez rs', displacement
346-
asm_rv32_opcode_cbnez(state, MAP_IN_C_REGISTER_WINDOW(rs), displacement);
340+
asm_rv32_opcode_cbnez(state, RV32_MAP_IN_C_REGISTER_WINDOW(rs), displacement);
347341
return;
348342
}
349343

@@ -364,8 +358,8 @@ void asm_rv32_emit_jump_if_reg_nonzero(asm_rv32_t *state, mp_uint_t rs, mp_uint_
364358
// jalr zero, temporary, LO(displacement) ; PC + 8
365359
// ... ; PC + 12
366360

367-
if (can_emit_short_jump && IS_IN_C_REGISTER_WINDOW(rs)) {
368-
asm_rv32_opcode_cbeqz(state, MAP_IN_C_REGISTER_WINDOW(rs), 10);
361+
if (can_emit_short_jump && RV32_IS_IN_C_REGISTER_WINDOW(rs)) {
362+
asm_rv32_opcode_cbeqz(state, RV32_MAP_IN_C_REGISTER_WINDOW(rs), 10);
369363
// Compensate for the C.BEQZ opcode.
370364
displacement -= ASM_HALFWORD_SIZE;
371365
} else {
@@ -438,9 +432,9 @@ void asm_rv32_emit_mov_reg_local(asm_rv32_t *state, mp_uint_t rd, mp_uint_t loca
438432
void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t local) {
439433
mp_uint_t offset = state->locals_stack_offset + (local * ASM_WORD_SIZE);
440434

441-
if (FIT_UNSIGNED(offset, 10) && offset != 0 && IS_IN_C_REGISTER_WINDOW(rd)) {
435+
if (FIT_UNSIGNED(offset, 10) && offset != 0 && RV32_IS_IN_C_REGISTER_WINDOW(rd)) {
442436
// c.addi4spn rd', offset
443-
asm_rv32_opcode_caddi4spn(state, MAP_IN_C_REGISTER_WINDOW(rd), offset);
437+
asm_rv32_opcode_caddi4spn(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), offset);
444438
return;
445439
}
446440

@@ -459,9 +453,9 @@ void asm_rv32_emit_mov_reg_local_addr(asm_rv32_t *state, mp_uint_t rd, mp_uint_t
459453
void asm_rv32_emit_load_reg_reg_offset(asm_rv32_t *state, mp_uint_t rd, mp_uint_t rs, mp_int_t offset) {
460454
mp_int_t scaled_offset = offset * sizeof(ASM_WORD_SIZE);
461455

462-
if (scaled_offset >= 0 && IS_IN_C_REGISTER_WINDOW(rd) && IS_IN_C_REGISTER_WINDOW(rs) && FIT_UNSIGNED(scaled_offset, 6)) {
456+
if (scaled_offset >= 0 && RV32_IS_IN_C_REGISTER_WINDOW(rd) && RV32_IS_IN_C_REGISTER_WINDOW(rs) && FIT_UNSIGNED(scaled_offset, 6)) {
463457
// c.lw rd', offset(rs')
464-
asm_rv32_opcode_clw(state, MAP_IN_C_REGISTER_WINDOW(rd), MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset);
458+
asm_rv32_opcode_clw(state, RV32_MAP_IN_C_REGISTER_WINDOW(rd), RV32_MAP_IN_C_REGISTER_WINDOW(rs), scaled_offset);
465459
return;
466460
}
467461

0 commit comments

Comments
 (0)