From 3c13ca0ed7798f4b78deb28fdb11e64d1a805a40 Mon Sep 17 00:00:00 2001 From: William Vinnicombe Date: Tue, 21 Jan 2025 12:18:44 +0000 Subject: [PATCH] Remove pico_rand and use boot random instead Rejig everything to fit into scratch, so full 512k of SRAM is available for the user --- enc_bootloader/CMakeLists.txt | 15 +- enc_bootloader/enc_bootloader.c | 18 +- enc_bootloader/memmap_enc_bootloader.ld | 259 ++++++++++++++++++++++++ main.cpp | 2 +- 4 files changed, 272 insertions(+), 22 deletions(-) create mode 100644 enc_bootloader/memmap_enc_bootloader.ld diff --git a/enc_bootloader/CMakeLists.txt b/enc_bootloader/CMakeLists.txt index c681401..baa1792 100644 --- a/enc_bootloader/CMakeLists.txt +++ b/enc_bootloader/CMakeLists.txt @@ -35,13 +35,13 @@ if (NOT USE_PRECOMPILED) target_link_libraries(enc_bootloader pico_stdlib - pico_rand ) # use stack guards, as AES variables are written near the stack target_compile_definitions(enc_bootloader PRIVATE PICO_USE_STACK_GUARDS=1 PICO_STACK_SIZE=0x200 + PICO_NO_PROGRAM_INFO=1 # No heap is used PICO_HEAP_SIZE=0 # These inits are not required @@ -51,17 +51,8 @@ if (NOT USE_PRECOMPILED) pico_minimize_runtime(enc_bootloader) pico_set_binary_type(enc_bootloader no_flash) - set(USE_USB_DPRAM FALSE) - # create linker script to run from 0x20070000 - file(READ ${PICO_LINKER_SCRIPT_PATH}/memmap_no_flash.ld LINKER_SCRIPT) - if (USE_USB_DPRAM) - string(REPLACE "RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 512k" "RAM(rwx) : ORIGIN = 0x2007F000, LENGTH = 4k" LINKER_SCRIPT "${LINKER_SCRIPT}") - target_compile_definitions(enc_bootloader PRIVATE USE_USB_DPRAM=1) - else() - string(REPLACE "RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 512k" "RAM(rwx) : ORIGIN = 0x2007F000, LENGTH = 4k" LINKER_SCRIPT "${LINKER_SCRIPT}") - endif() - file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/memmap_enc_bootloader.ld "${LINKER_SCRIPT}") - pico_set_linker_script(enc_bootloader ${CMAKE_CURRENT_BINARY_DIR}/memmap_enc_bootloader.ld) + set(USE_USB_DPRAM TRUE) + pico_set_linker_script(enc_bootloader ${CMAKE_CURRENT_LIST_DIR}/memmap_enc_bootloader.ld) pico_add_dis_output(enc_bootloader) else() project(enc_bootloader C CXX ASM) diff --git a/enc_bootloader/enc_bootloader.c b/enc_bootloader/enc_bootloader.c index 67be909..445b517 100644 --- a/enc_bootloader/enc_bootloader.c +++ b/enc_bootloader/enc_bootloader.c @@ -11,7 +11,6 @@ #include "pico/stdlib.h" #include "boot/picobin.h" #include "pico/bootrom.h" -#include "pico/rand.h" #include "hardware/structs/otp.h" #if USE_USB_DPRAM #include "hardware/structs/usb_dpram.h" @@ -34,10 +33,12 @@ extern uint32_t lut_a_map[1]; extern uint32_t lut_b_map[1]; extern uint32_t rstate_sha[4],rstate_lfsr[2]; -void __scratch_x("aes") resetrng() { +void resetrng() { uint32_t f0,f1; - do f0=get_rand_32(); while(f0==0); // make sure we don't initialise the LFSR to zero - f1=get_rand_32(); + uint32_t boot_random[4]; + rom_get_boot_random(boot_random); + do f0=boot_random[0]; while(f0==0); // make sure we don't initialise the LFSR to zero + f1=boot_random[1]; rstate_sha[0]=f0&0xffffff00; // bottom byte must be zero (or 4) for SHA, representing "out of data" rstate_sha[1]=f1; rstate_sha[2]=0x41414141; @@ -50,7 +51,7 @@ void __scratch_x("aes") resetrng() { #endif } -static void __scratch_x("aes") init_lut_map() { +static void init_lut_map() { int i; for(i=0;i<256;i++) lut_b[i]=gen_rand_sha()&0xff, lut_a[i]^=lut_b[i]; lut_a_map[0]=0; @@ -58,7 +59,7 @@ static void __scratch_x("aes") init_lut_map() { remap(); } -static void __scratch_x("aes") init_aes() { +static void init_aes() { resetrng(); gen_lut_sbox(); init_lut_map(); @@ -67,8 +68,7 @@ static void __scratch_x("aes") init_aes() { #if USE_USB_DPRAM uint8_t* workarea = (uint8_t*)USBCTRL_DPRAM_BASE; #else -// static __attribute__((aligned(4))) uint8_t workarea[4 * 1024]; -uint8_t* workarea = (uint8_t*)SRAM_SCRATCH_Y_BASE; +uint8_t* workarea = (uint8_t*)0x20080200; // AES Code & workspace from 0x20080180 -> 0x20081600 #endif int main() { @@ -127,7 +127,7 @@ int main() { int rc = rom_chain_image( workarea, - 4 * 1024 - PICO_STACK_SIZE, // Don't use stack in workarea + 4 * 1024, data_start_addr, data_size ); diff --git a/enc_bootloader/memmap_enc_bootloader.ld b/enc_bootloader/memmap_enc_bootloader.ld new file mode 100644 index 0000000..7becc19 --- /dev/null +++ b/enc_bootloader/memmap_enc_bootloader.ld @@ -0,0 +1,259 @@ +/* Based on GCC ARM embedded samples. + Defines the following symbols for use by code: + __exidx_start + __exidx_end + __etext + __data_start__ + __preinit_array_start + __preinit_array_end + __init_array_start + __init_array_end + __fini_array_start + __fini_array_end + __data_end__ + __bss_start__ + __bss_end__ + __end__ + end + __HeapLimit + __StackLimit + __StackTop + __stack (== StackTop) +*/ + +MEMORY +{ + RAM_START(rwx) : ORIGIN = 0x20080000, LENGTH = 0x180 + SCRATCH_X(rwx) : ORIGIN = 0x20080180, LENGTH = 0xE80 + SCRATCH_Y(rwx) : ORIGIN = 0x20081000, LENGTH = 0x800 + RAM(rwx) : ORIGIN = 0x20081800, LENGTH = 0x800 +} + +ENTRY(_entry_point) + +SECTIONS +{ + /* Note unlike RP2040, we start the image with a vector table even for + NO_FLASH builds. On Arm, the bootrom expects a VT at the start of the + image by default; on RISC-V, the default is to enter the image at its + lowest address, so an IMAGEDEF item is required to specify the + nondefault entry point. */ + + .start_text : { + __logical_binary_start = .; + /* Vectors require 512-byte alignment on v8-M when >48 IRQs are used, + so we would waste RAM if the vector table were not at the + start. */ + KEEP (*(.vectors)) + KEEP (*(.binary_info_header)) + __binary_info_header_end = .; + KEEP (*(.embedded_block)) + __embedded_block_end = .; + } > RAM_START + + .text : { + __reset_start = .; + KEEP (*(.reset)) + __reset_end = .; + *(.time_critical*) + *(.text*) + . = ALIGN(4); + *(.init) + *(.fini) + /* Pull all c'tors into .text */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + /* Followed by destructors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.eh_frame*) + } > RAM + + .rodata : { + . = ALIGN(4); + *(.rodata*) + *(.srodata*) + . = ALIGN(4); + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*))) + . = ALIGN(4); + } > RAM + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > RAM + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > RAM + __exidx_end = .; + + /* Machine inspectable binary information */ + . = ALIGN(4); + __binary_info_start = .; + .binary_info : + { + KEEP(*(.binary_info.keep.*)) + *(.binary_info.*) + } > RAM + __binary_info_end = .; + . = ALIGN(4); + + .data : { + __data_start__ = .; + *(vtable) + *(.data*) + *(.sdata*) + + . = ALIGN(4); + *(.after_data.*) + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__mutex_array_start = .); + KEEP(*(SORT(.mutex_array.*))) + KEEP(*(.mutex_array)) + PROVIDE_HIDDEN (__mutex_array_end = .); + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(SORT(.preinit_array.*))) + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + *(SORT(.fini_array.*)) + *(.fini_array) + PROVIDE_HIDDEN (__fini_array_end = .); + + *(.jcr) + . = ALIGN(4); + } > RAM + + .tdata : { + . = ALIGN(4); + *(.tdata .tdata.* .gnu.linkonce.td.*) + /* All data end */ + __tdata_end = .; + } > RAM + PROVIDE(__data_end__ = .); + + .uninitialized_data (NOLOAD): { + . = ALIGN(4); + *(.uninitialized_data*) + } > RAM + /* __etext is (for backwards compatibility) the name of the .data init source pointer (...) */ + __etext = LOADADDR(.data); + + .tbss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + __tls_base = .; + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + + __tls_end = .; + } > RAM + + .bss (NOLOAD) : { + . = ALIGN(4); + __tbss_end = .; + + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*))) + *(COMMON) + PROVIDE(__global_pointer$ = . + 2K); + *(.sbss*) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + .heap (NOLOAD): + { + __end__ = .; + end = __end__; + KEEP(*(.heap*)) + } > RAM + /* historically on GCC sbrk was growing past __HeapLimit to __StackLimit, however + to be more compatible, we now set __HeapLimit explicitly to where the end of the heap is */ + __HeapLimit = ORIGIN(RAM) + LENGTH(RAM); + + /* Start and end symbols must be word-aligned */ + .scratch_x : { + __scratch_x_start__ = .; + *(.scratch_x.*) + . = ALIGN(4); + __scratch_x_end__ = .; + } > SCRATCH_X + __scratch_x_source__ = LOADADDR(.scratch_x); + + .scratch_y : { + __scratch_y_start__ = .; + *(.scratch_y.*) + . = ALIGN(4); + __scratch_y_end__ = .; + } > SCRATCH_Y + __scratch_y_source__ = LOADADDR(.scratch_y); + + /* .stack*_dummy section doesn't contains any symbols. It is only + * used for linker to calculate size of stack sections, and assign + * values to stack symbols later + * + * stack1 section may be empty/missing if platform_launch_core1 is not used */ + + /* by default we put core 0 stack at the end of scratch Y, so that if core 1 + * stack is not used then all of SCRATCH_X is free. + */ + .stack1_dummy (NOLOAD): + { + *(.stack1*) + } > SCRATCH_X + .stack_dummy (NOLOAD): + { + KEEP(*(.stack*)) + } > SCRATCH_Y + + /* stack limit is poorly named, but historically is maximum heap ptr */ + __StackLimit = ORIGIN(RAM) + LENGTH(RAM); + __StackOneTop = ORIGIN(SCRATCH_X) + LENGTH(SCRATCH_X); + __StackTop = ORIGIN(SCRATCH_Y) + LENGTH(SCRATCH_Y); + __StackOneBottom = __StackOneTop - SIZEOF(.stack1_dummy); + __StackBottom = __StackTop - SIZEOF(.stack_dummy); + PROVIDE(__stack = __StackTop); + + /* picolibc and LLVM */ + PROVIDE (__heap_start = __end__); + PROVIDE (__heap_end = __HeapLimit); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __tls_size_align = (__tls_size + __tls_align - 1) & ~(__tls_align - 1)); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + + /* llvm-libc */ + PROVIDE (_end = __end__); + PROVIDE (__llvm_libc_heap_limit = __HeapLimit); + + /* Check if data + heap + stack exceeds RAM limit */ + ASSERT(__StackLimit >= __HeapLimit, "region RAM overflowed") + + ASSERT( __binary_info_header_end - __logical_binary_start <= 1024, "Binary info must be in first 1024 bytes of the binary") + ASSERT( __embedded_block_end - __logical_binary_start <= 4096, "Embedded block must be in first 4096 bytes of the binary") + + /* todo assert on extra code */ +} diff --git a/main.cpp b/main.cpp index 58f8bce..90cf263 100644 --- a/main.cpp +++ b/main.cpp @@ -4938,7 +4938,7 @@ bool encrypt_command::execute(device_map &devices) { enc_elf->read_file(tmp); // Bootloader size - auto bootloader_txt = enc_elf->get_section(".text"); + auto bootloader_txt = enc_elf->get_section(".start_text"); uint32_t bootloader_size = 0x20082000 - bootloader_txt->virtual_address(); // Move bootloader down in physical space to start of SRAM (which will be start of flash once packaged)