Skip to content

Commit 516aa62

Browse files
committed
Added new functions for allocation and heap checking
- added `allocate_buffer()` function that can be used to allocate large buffers: takes parameters to set preferred ram location, including 32bit accessible RAM on ESP32. Returns null if heap runs low or switches to PSRAM - getFreeHeapSize() and getContiguousFreeHeap() helper functions for all platforms to correctly report free useable heap - updated some constants - updated segment data allocation to free the data if it is large
1 parent b96a950 commit 516aa62

File tree

7 files changed

+140
-134
lines changed

7 files changed

+140
-134
lines changed

wled00/FX.h

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ extern byte realtimeMode; // used in getMappedPixelIndex()
9898
#define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1024) // 20k by default (S2 is short on free RAM)
9999
#else
100100
#define MAX_NUM_SEGMENTS 32 // warning: going beyond 32 may consume too much RAM for stable operation
101-
#define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*2560) // 80k by default
101+
#define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1920) // 60k by default
102102
#endif
103103

104104
/* How much data bytes each segment should max allocate to leave enough space for other segments,
@@ -599,12 +599,8 @@ class Segment {
599599
, _t(nullptr)
600600
{
601601
DEBUGFX_PRINTF_P(PSTR("-- Creating segment: %p [%d,%d:%d,%d]\n"), this, (int)start, (int)stop, (int)startY, (int)stopY);
602-
// allocate render buffer (always entire segment), prefer PSRAM if DRAM is running low. Note: impact on FPS with PSRAM buffer is low (~2% with QSPI PSRAM)
603-
#ifdef CONFIG_IDF_TARGET_ESP32
604-
pixels = static_cast<uint32_t*>(pixelbuffer_malloc(sizeof(uint32_t) * length()));
605-
#else
606-
pixels = static_cast<uint32_t*>(p_calloc(length(), sizeof(uint32_t))); // prefer PSRAM. note: error handling is also done in isActive()
607-
#endif
602+
// allocate render buffer (always entire segment), prefer PSRAM if DRAM is running low. Note: impact on FPS with PSRAM buffer is low (<2% with QSPI PSRAM)
603+
pixels = static_cast<uint32_t*>(allocate_buffer(length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS | BFRALLOC_CLEAR));
608604
if (!pixels) {
609605
DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
610606
extern byte errorFlag;

wled00/FX_fcn.cpp

Lines changed: 29 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -69,14 +69,9 @@ Segment::Segment(const Segment &orig) {
6969
if (orig.name) { name = static_cast<char*>(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); }
7070
if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); }
7171
if (orig.pixels) {
72-
pixels = static_cast<uint32_t*>(d_malloc(sizeof(uint32_t) * orig.length()));
73-
74-
// pixels = static_cast<uint32_t*>(heap_caps_malloc(orig.length()* sizeof(uint32_t), MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL)); // use this for ESP32
75-
//pixels = static_cast<uint32_t*>(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT));
76-
//pixels = static_cast<uint32_t*>(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT));
77-
78-
79-
if (pixels) memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length());
72+
// allocate pixel buffer: prefer PSRAM if DRAM is running low
73+
pixels = static_cast<uint32_t*>(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS));
74+
if (pixels) memcpy(pixels, orig.pixels, orig.length() * sizeof(uint32_t));
8075
else {
8176
DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
8277
errorFlag = ERR_NORAM_PX;
@@ -116,12 +111,9 @@ Segment& Segment::operator= (const Segment &orig) {
116111
if (orig.name) { name = static_cast<char*>(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); }
117112
if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); }
118113
if (orig.pixels) {
119-
pixels = static_cast<uint32_t*>(d_malloc(sizeof(uint32_t) * orig.length()));
120-
//TODO: also need to put this in 32bit memory on ESP32, maybe make that a function...
121-
//pixels = static_cast<uint32_t*>(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT));
122-
//pixels = static_cast<uint32_t*>(heap_caps_malloc(sizeof(uint32_t) * orig.length(), MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT));
123-
124-
if (pixels) memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length());
114+
// allocate pixel buffer: prefer PSRAM if DRAM is running low
115+
pixels = static_cast<uint32_t*>(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS));
116+
if (pixels) memcpy(pixels, orig.pixels, orig.length() * sizeof(uint32_t));
125117
else {
126118
DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
127119
errorFlag = ERR_NORAM_PX;
@@ -156,50 +148,41 @@ bool Segment::allocateData(size_t len) {
156148
if (len == 0) return false; // nothing to do
157149
if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation)
158150
if (call == 0) {
159-
if(checkHeapHealth()) {
151+
if(_dataLen < FAIR_DATA_PER_SEG) { // segment data is small
160152
//DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this);
161153
memset(data, 0, len); // erase buffer if called during effect initialisation
162154
return true; // no need to reallocate
163155
}
164-
else {
165-
d_free(data); // free data and try to allocate again
166-
data = nullptr;
167-
Segment::addUsedSegmentData(-_dataLen); // subtract buffer size
168-
}
169156
}
170157
else
171158
return true;
172159
}
173160
//DEBUG_PRINTF_P(PSTR("-- Allocating data (%d): %p\n"), len, this);
174-
if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) {
175-
// not enough memory
176-
DEBUG_PRINTF_P(PSTR("!!! Not enough RAM: %d/%d !!!\n"), len, Segment::getUsedSegmentData());
177-
errorFlag = ERR_NORAM;
178-
return false;
161+
// limit to MAX_SEGMENT_DATA if there is no PSRAM, otherwise prefer functionality over speed
162+
#if defined(ARDUINO_ARCH_ESP32)
163+
if(!(psramFound() && psramSafe))
164+
#endif
165+
{
166+
if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) {
167+
// not enough memory
168+
DEBUG_PRINTF_P(PSTR("!!! Not enough RAM: %d/%d !!!\n"), len, Segment::getUsedSegmentData());
169+
errorFlag = ERR_NORAM;
170+
return false;
171+
}
179172
}
180173
// prefer DRAM over PSRAM for speed
181174
if (data) {
182-
data = (byte*)d_realloc_malloc(data, len); // realloc with malloc fallback
183-
if (data == nullptr) { // allocation failed
184-
Segment::addUsedSegmentData(-_dataLen); // subtract original buffer size
185-
_dataLen = 0; // reset data length
186-
return false;
187-
}
175+
d_free(data); // free data and try to allocate again (segment buffer may be blocking contiguous heap)
176+
Segment::addUsedSegmentData(-_dataLen); // subtract buffer size
188177
}
189-
else data = (byte*)d_malloc(len);
178+
179+
data = static_cast<byte*>(allocate_buffer(len, BFRALLOC_PREFER_DRAM | BFRALLOC_CLEAR));
190180

191181
if (data) {
192-
if(!checkHeapHealth()) {
193-
d_free(data);
194-
data = nullptr;
195-
}
196-
else {
197-
memset(data, 0, len); // erase buffer
198-
Segment::addUsedSegmentData(len);
199-
_dataLen = len;
200-
//DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data);
201-
return true;
202-
}
182+
Segment::addUsedSegmentData(len);
183+
_dataLen = len;
184+
//DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data);
185+
return true;
203186
}
204187
// allocation failed
205188
DEBUG_PRINTLN(F("!!! Allocation failed. !!!"));
@@ -486,16 +469,7 @@ void Segment::setGeometry(uint16_t i1, uint16_t i2, uint8_t grp, uint8_t spc, ui
486469
// allocate FX render buffer
487470
if (length() != oldLength) {
488471
if (pixels) free(pixels); // note: using realloc can block larger heap segments
489-
#ifdef ARDUINO_ARCH_ESP32
490-
pixels = static_cast<uint32_t*>(pixelbuffer_malloc(izeof(uint32_t) * length());
491-
#else
492-
pixels = static_cast<uint32_t*>(p_malloc(sizeof(uint32_t) * length()));
493-
#endif
494-
495-
if(!checkHeapHealth()) {
496-
d_free(pixels);
497-
pixels = nullptr;
498-
}
472+
pixels = static_cast<uint32_t*>(allocate_buffer(length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS));
499473
if (!pixels) {
500474
DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!"));
501475
errorFlag = ERR_NORAM_PX;
@@ -1250,14 +1224,8 @@ void WS2812FX::finalizeInit() {
12501224

12511225
// allocate frame buffer after matrix has been set up (gaps!)
12521226
if (_pixels) d_free(_pixels);
1253-
#ifdef ARDUINO_ARCH_ESP32
1254-
_pixels = static_cast<uint32_t*>(pixelbuffer_malloc(getLengthTotal() * sizeof(uint32_t), true)); // use 32bit RAM (IRAM) or PSRAM on ESP32
1255-
#elif !defined(ESP8266)
1256-
// use PSRAM on S2 and S3 if available (C3 defaults to DRAM). Note: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM
1257-
_pixels = static_cast<uint32_t*>(heap_caps_malloc_prefer(size, 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT)); // prefer PSRAM if it exists
1258-
#else
1259-
_pixels = static_cast<uint32_t*>(malloc(getLengthTotal() * sizeof(uint32_t))); // ESP8266 does not support advanced allocation API
1260-
#endif
1227+
// use PSRAM if available: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM for this buffer
1228+
_pixels = static_cast<uint32_t*>(allocate_buffer(getLengthTotal() * sizeof(uint32_t), BFRALLOC_ENFORCE_PSRAM | BFRALLOC_NOBYTEACCESS | BFRALLOC_CLEAR));
12611229
DEBUG_PRINTF_P(PSTR("strip buffer size: %uB\n"), getLengthTotal() * sizeof(uint32_t));
12621230
DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), getFreeHeapSize());
12631231
}

wled00/const.h

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -547,16 +547,19 @@ static_assert(WLED_MAX_BUSSES <= 32, "WLED_MAX_BUSSES exceeds hard limit");
547547
#endif
548548

549549
// minimum heap size required to process web requests: try to keep free heap above this value
550-
#define MIN_HEAP_SIZE (12*1024)
551-
550+
#ifdef ESP8266
551+
#define MIN_HEAP_SIZE (8*1024)
552+
#else
553+
#define MIN_HEAP_SIZE (12*1024)
554+
#endif
552555
// threshold for PSRAM use: if heap is running low, requests above PSRAM_THRESHOLD will be allocated in PSRAM
553556
// if heap is plenty, requests below PSRAM_THRESHOLD will be allocated in DRAM for speed
554557
#if defined(CONFIG_IDF_TARGET_ESP32S3)
555-
#define PSRAM_THRESHOLD 8192
558+
#define PSRAM_THRESHOLD 5120
556559
#elif defined(CONFIG_IDF_TARGET_ESP32)
557560
#define PSRAM_THRESHOLD 4096
558561
#else
559-
#define PSRAM_THRESHOLD 2048 // S2 does not have a lot of RAM, C3 and ESP8266 do not support PSRAM: the value is not used
562+
#define PSRAM_THRESHOLD 1024 // S2 does not have a lot of RAM. C3 and ESP8266 do not support PSRAM: the value is not used
560563
#endif
561564

562565
// Web server limits

wled00/fcn_declare.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -582,12 +582,20 @@ extern "C" {
582582
#define d_realloc_malloc realloc_malloc
583583
#define d_free free
584584
#endif
585-
bool checkHeapHealth(unsigned minFreeBlockSize = MIN_HEAP_SIZE); // checks heap fragmentation: returns true if contiguous free memory is larger than minFreeBlockSize
586585
#ifndef ESP8266
587-
inline unsigned getFreeHeapSize() { return heap_caps_get_free_size(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns free heap (ESP.getFreeHeap() can include other memory types)
586+
inline size_t getFreeHeapSize() { return heap_caps_get_free_size(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns free heap (ESP.getFreeHeap() can include other memory types)
587+
inline size_t getContiguousFreeHeap() { return heap_caps_get_largest_free_block(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns largest contiguous free block
588588
#else
589-
inline unsigned getFreeHeapSize() { return ESP.getFreeHeap(); } // returns free heap
589+
inline size_t getFreeHeapSize() { return ESP.getFreeHeap(); } // returns free heap
590+
inline size_t getContiguousFreeHeap() { return ESP.getMaxFreeBlockSize(); } // returns largest contiguous free block
590591
#endif
592+
#define BFRALLOC_NOBYTEACCESS (1 << 0) // ESP32 has 32bit accessible DRAM (usually ~50kB free) that must not be byte-accessed
593+
#define BFRALLOC_PREFER_DRAM (1 << 1) // prefer DRAM over PSRAM
594+
#define BFRALLOC_ENFORCE_DRAM (1 << 2) // use DRAM only, no PSRAM
595+
#define BFRALLOC_PREFER_PSRAM (1 << 3) // prefer PSRAM over DRAM
596+
#define BFRALLOC_ENFORCE_PSRAM (1 << 4) // use PSRAM if available, otherwise fall back to DRAM
597+
#define BFRALLOC_CLEAR (1 << 5) // clear allocated buffer after allocation
598+
void *allocate_buffer(size_t size, uint32_t type);
591599

592600
// RAII guard class for the JSON Buffer lock
593601
// Modeled after std::lock_guard

wled00/json.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -812,7 +812,7 @@ void serializeInfo(JsonObject root)
812812
root[F("clock")] = ESP.getCpuFreqMHz();
813813
root[F("flash")] = (ESP.getFlashChipSize()/1024)/1024;
814814
#ifdef WLED_DEBUG
815-
root[F("maxalloc")] = ESP.getMaxAllocHeap();
815+
root[F("maxalloc")] = getContiguousFreeHeap();
816816
root[F("resetReason0")] = (int)rtc_get_reset_reason(0);
817817
root[F("resetReason1")] = (int)rtc_get_reset_reason(1);
818818
#endif
@@ -823,7 +823,7 @@ void serializeInfo(JsonObject root)
823823
root[F("clock")] = ESP.getCpuFreqMHz();
824824
root[F("flash")] = (ESP.getFlashChipSize()/1024)/1024;
825825
#ifdef WLED_DEBUG
826-
root[F("maxalloc")] = ESP.getMaxFreeBlockSize();
826+
root[F("maxalloc")] = getContiguousFreeHeap();
827827
root[F("resetReason")] = (int)ESP.getResetInfoPtr()->reason;
828828
#endif
829829
root[F("lwip")] = LWIP_VERSION_MAJOR;

0 commit comments

Comments
 (0)