Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update mmu_get... and mmu_set... #8290

Merged
merged 10 commits into from
Oct 13, 2021
126 changes: 90 additions & 36 deletions cores/esp8266/mmu_iram.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,24 @@
extern "C" {
#endif

//C This turns on range checking. Is this the value you want to trigger it?
// This turns on range checking.
#ifdef DEBUG_ESP_CORE
#define DEBUG_ESP_MMU
#endif

#if defined(CORE_MOCK)
#define ets_uart_printf(...) do {} while(false)
#define XCHAL_INSTRAM0_VADDR 0x40000000
#define XCHAL_INSTRAM1_VADDR 0x40100000
#define XCHAL_INSTROM0_VADDR 0x40200000
#else
#include <sys/config.h> // For config/core-isa.h
/*
Cautiously use XCHAL_..._VADDR values where possible.
While XCHAL_..._VADDR values in core-isa.h may define the Xtensa processor
CONFIG options, they are not always an indication of DRAM, IRAM, or ROM
size or position in the address space.
*/
Comment on lines +40 to +46
Copy link
Collaborator

@mcspr mcspr Sep 25, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess make sense with the available definitions, naming would also simplify migration to anything else.

Looking at the RTOS, there is esp8266/eagle_soc.h declaring various IRAM_..., DRAM_..., RTC_... preprocessor tokens and funcs related to ranges. This is out-of-scope here, but I wonder if something like this is warranted for the repo as well (and since even the 3.x NONOS does not have anything of sorts)

#endif

/*
Expand Down Expand Up @@ -71,32 +82,34 @@ DBG_MMU_FLUSH(0)

static inline __attribute__((always_inline))
bool mmu_is_iram(const void *addr) {
#define IRAM_START 0x40100000UL
const uintptr_t iram_start = (uintptr_t)XCHAL_INSTRAM1_VADDR;
#ifndef MMU_IRAM_SIZE
#if defined(__GNUC__) && !defined(CORE_MOCK)
#warning "MMU_IRAM_SIZE was undefined, setting to 0x8000UL!"
#endif
#define MMU_IRAM_SIZE 0x8000UL
#define MMU_IRAM_SIZE 0x8000ul
#endif
#define IRAM_END (IRAM_START + MMU_IRAM_SIZE)
const uintptr_t iram_end = iram_start + MMU_IRAM_SIZE;

return (IRAM_START <= (uintptr_t)addr && IRAM_END > (uintptr_t)addr);
return (iram_start <= (uintptr_t)addr && iram_end > (uintptr_t)addr);
}

static inline __attribute__((always_inline))
bool mmu_is_dram(const void *addr) {
#define DRAM_START 0x3FF80000UL
#define DRAM_END 0x40000000UL
const uintptr_t dram_start = 0x3FFE8000ul;
// The start of the Boot ROM sits at the end of DRAM. 0x40000000ul;
const uintptr_t dram_end = (uintptr_t)XCHAL_INSTRAM0_VADDR;

return (DRAM_START <= (uintptr_t)addr && DRAM_END > (uintptr_t)addr);
return (dram_start <= (uintptr_t)addr && dram_end > (uintptr_t)addr);
}

static inline __attribute__((always_inline))
bool mmu_is_icache(const void *addr) {
#define ICACHE_START 0x40200000UL
#define ICACHE_END (ICACHE_START + 0x100000UL)
extern void _irom0_text_end(void);
const uintptr_t icache_start = (uintptr_t)XCHAL_INSTROM0_VADDR;
const uintptr_t icache_end = (uintptr_t)_irom0_text_end;

return (ICACHE_START <= (uintptr_t)addr && ICACHE_END > (uintptr_t)addr);
return (icache_start <= (uintptr_t)addr && icache_end > (uintptr_t)addr);
}

#ifdef DEBUG_ESP_MMU
Expand Down Expand Up @@ -127,90 +140,131 @@ bool mmu_is_icache(const void *addr) {
static inline __attribute__((always_inline))
uint8_t mmu_get_uint8(const void *p8) {
ASSERT_RANGE_TEST_READ(p8);
uint32_t val = (*(uint32_t *)((uintptr_t)p8 & ~0x3));
uint32_t pos = ((uintptr_t)p8 & 0x3) * 8;
// https://gist.github.com/shafik/848ae25ee209f698763cffee272a58f8#how-do-we-type-pun-correctly
// Comply with strict-aliasing rules. Using memcpy is a Standards suggested
// method for type punning. The compiler optimizer will replace the memcpy
// with an `l32i` instruction. Using __builtin_memcpy to ensure we get the
// effects of the compiler optimization and not some #define version of
// memcpy.
void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u);
uint32_t val;
__builtin_memcpy(&val, v32, sizeof(uint32_t));
// Use an empty ASM to reference the 32-bit value. This will block the
// compiler from immediately optimizing to an 8-bit or 16-bit load instruction
// against IRAM memory. (This approach was inspired by
// https://github.com/esp8266/Arduino/pull/7780#discussion_r548303374)
// This issue was seen when using a constant address with the GCC 10.3
// compiler.
// As a general practice, I think referencing by way of Extended ASM R/W
// output register will stop the the compiler from reloading the value later
// as 8-bit load from IRAM.
asm volatile ("" :"+r"(val)); // inject 32-bit dependency
uint32_t pos = ((uintptr_t)p8 & 3u) * 8u;
val >>= pos;
return (uint8_t)val;
}

static inline __attribute__((always_inline))
uint16_t mmu_get_uint16(const uint16_t *p16) {
ASSERT_RANGE_TEST_READ(p16);
uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3));
uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)0x3u);
uint32_t val;
__builtin_memcpy(&val, v32, sizeof(uint32_t));
asm volatile ("" :"+r"(val));
uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
val >>= pos;
return (uint16_t)val;
}

static inline __attribute__((always_inline))
int16_t mmu_get_int16(const int16_t *p16) {
ASSERT_RANGE_TEST_READ(p16);
uint32_t val = (*(uint32_t *)((uintptr_t)p16 & ~0x3));
uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
uint32_t val;
__builtin_memcpy(&val, v32, sizeof(uint32_t));
asm volatile ("" :"+r"(val));
uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
val >>= pos;
return (int16_t)val;
}

static inline __attribute__((always_inline))
uint8_t mmu_set_uint8(void *p8, const uint8_t val) {
ASSERT_RANGE_TEST_WRITE(p8);
uint32_t pos = ((uintptr_t)p8 & 0x3) * 8;
uint32_t pos = ((uintptr_t)p8 & 3u) * 8u;
uint32_t sval = val << pos;
uint32_t valmask = 0x0FF << pos;
uint32_t valmask = 0x0FFu << pos;

void *v32 = (void *)((uintptr_t)p8 & ~(uintptr_t)3u);
uint32_t ival;
__builtin_memcpy(&ival, v32, sizeof(uint32_t));
asm volatile ("" :"+r"(ival));

uint32_t *p32 = (uint32_t *)((uintptr_t)p8 & ~0x3);
uint32_t ival = *p32;
ival &= (~valmask);
ival |= sval;
*p32 = ival;
/*
This 32-bit dependency injection does not appear to be needed with the
current GCC 10.3; however, that could change in the future versions. Or, I
may not have the right test for it to fail.
*/
asm volatile ("" :"+r"(ival));
__builtin_memcpy(v32, &ival, sizeof(uint32_t));
return val;
}

static inline __attribute__((always_inline))
uint16_t mmu_set_uint16(uint16_t *p16, const uint16_t val) {
ASSERT_RANGE_TEST_WRITE(p16);
uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
uint32_t sval = val << pos;
uint32_t valmask = 0x0FFFF << pos;
uint32_t valmask = 0x0FFFFu << pos;

void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
uint32_t ival;
__builtin_memcpy(&ival, v32, sizeof(uint32_t));
asm volatile ("" :"+r"(ival));

uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3);
uint32_t ival = *p32;
ival &= (~valmask);
ival |= sval;
*p32 = ival;
asm volatile ("" :"+r"(ival));
__builtin_memcpy(v32, &ival, sizeof(uint32_t));
return val;
}

static inline __attribute__((always_inline))
int16_t mmu_set_int16(int16_t *p16, const int16_t val) {
ASSERT_RANGE_TEST_WRITE(p16);
uint32_t sval = (uint16_t)val;
uint32_t pos = ((uintptr_t)p16 & 0x3) * 8;
uint32_t pos = ((uintptr_t)p16 & 3u) * 8u;
sval <<= pos;
uint32_t valmask = 0x0FFFF << pos;
uint32_t valmask = 0x0FFFFu << pos;

void *v32 = (void *)((uintptr_t)p16 & ~(uintptr_t)3u);
uint32_t ival;
__builtin_memcpy(&ival, v32, sizeof(uint32_t));
asm volatile ("" :"+r"(ival));

uint32_t *p32 = (uint32_t *)((uintptr_t)p16 & ~0x3);
uint32_t ival = *p32;
ival &= (~valmask);
ival |= sval;
*p32 = ival;
asm volatile ("" :"+r"(ival));
__builtin_memcpy(v32, &ival, sizeof(uint32_t));
return val;
}

#if (MMU_IRAM_SIZE > 32*1024) && !defined(MMU_SEC_HEAP)
extern void _text_end(void);
#define MMU_SEC_HEAP mmu_sec_heap()
#define MMU_SEC_HEAP_SIZE mmu_sec_heap_size()

static inline __attribute__((always_inline))
void *mmu_sec_heap(void) {
uint32_t sec_heap = (uint32_t)_text_end + 32;
return (void *)(sec_heap &= ~7);
extern void _text_end(void);
uintptr_t sec_heap = (uintptr_t)_text_end + (uintptr_t)32u;
return (void *)(sec_heap &= ~(uintptr_t)7u);
}

static inline __attribute__((always_inline))
size_t mmu_sec_heap_size(void) {
return (size_t)0xC000UL - ((size_t)mmu_sec_heap() - 0x40100000UL);
return (size_t)0xC000ul - ((uintptr_t)mmu_sec_heap() - (uintptr_t)XCHAL_INSTRAM1_VADDR);
}
#endif

Expand Down
12 changes: 9 additions & 3 deletions libraries/esp8266/examples/IramReserve/IramReserve.ino
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@
#include <umm_malloc/umm_malloc.h>
#if defined(UMM_HEAP_IRAM)

#if defined(CORE_MOCK)
#define XCHAL_INSTRAM1_VADDR 0x40100000
#else
#include <sys/config.h> // For config/core-isa.h
#endif

// durable - as in long life, persisting across reboots.
struct durable {
uint32_t bootCounter;
Expand All @@ -30,7 +36,7 @@ struct durable {
#define IRAM_RESERVE_SZ ((sizeof(struct durable) + 7UL) & ~7UL)

// Position its address just above the reduced 2nd Heap.
#define IRAM_RESERVE (0x40100000UL + 0xC000UL - IRAM_RESERVE_SZ)
#define IRAM_RESERVE ((uintptr_t)XCHAL_INSTRAM1_VADDR + 0xC000UL - IRAM_RESERVE_SZ)

// Define a reference with the right properties to make access easier.
#define DURABLE ((struct durable *)IRAM_RESERVE)
Expand Down Expand Up @@ -100,9 +106,9 @@ extern "C" void umm_init_iram(void) {
adjustments and checksums. These can affect the persistence of data across
reboots.
*/
uint32_t sec_heap = (uint32_t)_text_end + 32;
uintptr_t sec_heap = (uintptr_t)_text_end + 32;
sec_heap &= ~7;
size_t sec_heap_sz = 0xC000UL - (sec_heap - 0x40100000UL);
size_t sec_heap_sz = 0xC000UL - (sec_heap - (uintptr_t)XCHAL_INSTRAM1_VADDR);
sec_heap_sz -= IRAM_RESERVE_SZ; // Shrink IRAM heap
if (0xC000UL > sec_heap_sz) {

Expand Down
8 changes: 7 additions & 1 deletion libraries/esp8266/examples/MMU48K/MMU48K.ino
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,12 @@
#include <umm_malloc/umm_malloc.h>
#include <umm_malloc/umm_heap_select.h>

#if defined(CORE_MOCK)
#define XCHAL_INSTRAM1_VADDR 0x40100000
#else
#include <sys/config.h> // For config/core-isa.h
#endif

uint32_t timed_byte_read(char *pc, uint32_t * o);
uint32_t timed_byte_read2(char *pc, uint32_t * o);
int divideA_B(int a, int b);
Expand Down Expand Up @@ -102,7 +108,7 @@ void print_mmu_status(Print& oStream) {
#ifdef MMU_IRAM_SIZE
oStream.printf_P(PSTR(" IRAM Size: %u"), MMU_IRAM_SIZE);
oStream.println();
const uint32_t iram_free = MMU_IRAM_SIZE - (uint32_t)((uintptr_t)_text_end - 0x40100000UL);
const uint32_t iram_free = MMU_IRAM_SIZE - (uint32_t)((uintptr_t)_text_end - (uintptr_t)XCHAL_INSTRAM1_VADDR);
oStream.printf_P(PSTR(" IRAM free: %u"), iram_free);
oStream.println();
#endif
Expand Down
Loading