diff --git a/src/SafeFile.cpp b/src/SafeFile.cpp new file mode 100644 index 0000000000..161a808701 --- /dev/null +++ b/src/SafeFile.cpp @@ -0,0 +1,99 @@ +#include "SafeFile.h" + +#ifdef FSCom + +// Only way to work on both esp32 and nrf52 +static File openFile(const char *filename, bool fullAtomic) +{ + if (!fullAtomic) + FSCom.remove(filename); // Nuke the old file to make space (ignore if it !exists) + + String filenameTmp = filename; + filenameTmp += ".tmp"; + + return FSCom.open(filenameTmp.c_str(), FILE_O_WRITE); +} + +SafeFile::SafeFile(const char *_filename, bool fullAtomic) + : filename(_filename), f(openFile(_filename, fullAtomic)), fullAtomic(fullAtomic) +{ +} + +size_t SafeFile::write(uint8_t ch) +{ + if (!f) + return 0; + + hash ^= ch; + return f.write(ch); +} + +size_t SafeFile::write(const uint8_t *buffer, size_t size) +{ + if (!f) + return 0; + + for (size_t i = 0; i < size; i++) { + hash ^= buffer[i]; + } + return f.write((uint8_t const *)buffer, size); // This nasty cast is _IMPORTANT_ otherwise the correct adafruit method does + // not get used (they made a mistake in their typing) +} + +/** + * Atomically close the file (deleting any old versions) and readback the contents to confirm the hash matches + * + * @return false for failure + */ +bool SafeFile::close() +{ + if (!f) + return false; + + f.close(); + if (!testReadback()) + return false; + + // brief window of risk here ;-) + if (fullAtomic && FSCom.exists(filename.c_str()) && !FSCom.remove(filename.c_str())) { + LOG_ERROR("Can't remove old pref file\n"); + return false; + } + + String filenameTmp = filename; + filenameTmp += ".tmp"; + if (!renameFile(filenameTmp.c_str(), filename.c_str())) { + LOG_ERROR("Error: can't rename new pref file\n"); + return false; + } + + return true; +} + +/// Read our (closed) tempfile back in and compare the hash +bool SafeFile::testReadback() +{ + String filenameTmp = filename; + filenameTmp += ".tmp"; + auto f2 = FSCom.open(filenameTmp.c_str(), FILE_O_READ); + if (!f2) { + LOG_ERROR("Can't open tmp file for readback\n"); + return false; + } + + int c = 0; + uint8_t test_hash = 0; + while ((c = f2.read()) >= 0) { + test_hash ^= (uint8_t)c; + } + f2.close(); + + if (test_hash != hash) { + LOG_ERROR("Readback failed hash mismatch\n"); + return false; + } + + return true; +} + +#endif \ No newline at end of file diff --git a/src/SafeFile.h b/src/SafeFile.h new file mode 100644 index 0000000000..7088074cdc --- /dev/null +++ b/src/SafeFile.h @@ -0,0 +1,49 @@ +#pragma once + +#include "FSCommon.h" +#include "configuration.h" + +#ifdef FSCom + +/** + * This class provides 'safe'/paranoid file writing. + * + * Some of our filesystems (in particular the nrf52) may have bugs beneath our layer. Therefore we want to + * be very careful about how we write files. This class provides a restricted (Stream only) writing API for writing to files. + * + * Notably: + * - we keep a simple xor hash of all characters that were written. + * - We do not allow seeking (because we want to maintain our hash) + * - we provide an close() method which is similar to close but returns false if we were unable to successfully write the + * file. Also this method + * - atomically replaces any old version of the file on the disk with our new file (after first rereading the file from the disk + * to confirm the hash matches) + * - Some files are super huge so we can't do the full atomic rename/copy (because of filesystem size limits). If !fullAtomic + * then we still do the readback to verify file is valid so higher level code can handle failures. + */ +class SafeFile : public Print +{ + public: + SafeFile(char const *filepath, bool fullAtomic = false); + + virtual size_t write(uint8_t); + virtual size_t write(const uint8_t *buffer, size_t size); + + /** + * Atomically close the file (deleting any old versions) and readback the contents to confirm the hash matches + * + * @return false for failure + */ + bool close(); + + private: + /// Read our (closed) tempfile back in and compare the hash + bool testReadback(); + + String filename; + File f; + bool fullAtomic; + uint8_t hash = 0; +}; + +#endif \ No newline at end of file diff --git a/src/gps/GPS.cpp b/src/gps/GPS.cpp index 0d937ae63c..93742a99a5 100644 --- a/src/gps/GPS.cpp +++ b/src/gps/GPS.cpp @@ -1090,7 +1090,7 @@ int32_t GPS::runOnce() if (devicestate.did_gps_reset && scheduling.elapsedSearchMs() > 60 * 1000UL && !hasFlow()) { LOG_DEBUG("GPS is not communicating, trying factory reset on next bootup.\n"); devicestate.did_gps_reset = false; - nodeDB->saveDeviceStateToDisk(); + nodeDB->saveToDisk(SEGMENT_DEVICESTATE); return disable(); // Stop the GPS thread as it can do nothing useful until next reboot. } } diff --git a/src/mesh/NodeDB.cpp b/src/mesh/NodeDB.cpp index de63af08ef..61f08fe653 100644 --- a/src/mesh/NodeDB.cpp +++ b/src/mesh/NodeDB.cpp @@ -14,6 +14,7 @@ #include "PowerFSM.h" #include "RTC.h" #include "Router.h" +#include "SafeFile.h" #include "TypeConversions.h" #include "error.h" #include "main.h" @@ -53,6 +54,28 @@ meshtastic_LocalConfig config; meshtastic_LocalModuleConfig moduleConfig; meshtastic_ChannelFile channelFile; meshtastic_OEMStore oemStore; +static bool hasOemStore = false; + +// These are not publically exposed - copied from InternalFileSystem.cpp +// #define FLASH_NRF52_PAGE_SIZE 4096 +// #define LFS_FLASH_TOTAL_SIZE (7*FLASH_NRF52_PAGE_SIZE) +// #define LFS_BLOCK_SIZE 128 + +/// List all files in the FS and test write and readback. +/// Useful for filesystem stress testing - normally stripped from build by the linker. +void flashTest() +{ + auto filesManifest = getFiles("/", 5); + + uint32_t totalSize = 0; + for (size_t i = 0; i < filesManifest.size(); i++) { + LOG_INFO("File %s (size %d)\n", filesManifest[i].file_name, filesManifest[i].size_bytes); + totalSize += filesManifest[i].size_bytes; + } + LOG_INFO("%d files (total size %u)\n", filesManifest.size(), totalSize); + // LOG_INFO("Filesystem block size %u, total bytes %u", LFS_FLASH_TOTAL_SIZE, LFS_BLOCK_SIZE); + nodeDB->saveToDisk(); +} bool meshtastic_DeviceState_callback(pb_istream_t *istream, pb_ostream_t *ostream, const pb_field_iter_t *field) { @@ -608,22 +631,30 @@ LoadFileResult NodeDB::loadProto(const char *filename, size_t protoSize, size_t void NodeDB::loadFromDisk() { + devicestate.version = + 0; // Mark the current device state as completely unusable, so that if we fail reading the entire file from + // disk we will still factoryReset to restore things. + // static DeviceState scratch; We no longer read into a tempbuf because this structure is 15KB of valuable RAM auto state = loadProto(prefFileName, sizeof(meshtastic_DeviceState) + MAX_NUM_NODES * sizeof(meshtastic_NodeInfo), sizeof(meshtastic_DeviceState), &meshtastic_DeviceState_msg, &devicestate); - if (state != LoadFileResult::LOAD_SUCCESS) { - installDefaultDeviceState(); // Our in RAM copy might now be corrupt + // See https://github.com/meshtastic/firmware/issues/4184#issuecomment-2269390786 + // It is very important to try and use the saved prefs even if we fail to read meshtastic_DeviceState. Because most of our + // critical config may still be valid (in the other files - loaded next). + // Also, if we did fail on reading we probably failed on the enormous (and non critical) nodeDB. So DO NOT install default + // device state. + // if (state != LoadFileResult::LOAD_SUCCESS) { + // installDefaultDeviceState(); // Our in RAM copy might now be corrupt + //} else { + if (devicestate.version < DEVICESTATE_MIN_VER) { + LOG_WARN("Devicestate %d is old, discarding\n", devicestate.version); + factoryReset(); } else { - if (devicestate.version < DEVICESTATE_MIN_VER) { - LOG_WARN("Devicestate %d is old, discarding\n", devicestate.version); - factoryReset(); - } else { - LOG_INFO("Loaded saved devicestate version %d, with nodecount: %d\n", devicestate.version, - devicestate.node_db_lite.size()); - meshNodes = &devicestate.node_db_lite; - numMeshNodes = devicestate.node_db_lite.size(); - } + LOG_INFO("Loaded saved devicestate version %d, with nodecount: %d\n", devicestate.version, + devicestate.node_db_lite.size()); + meshNodes = &devicestate.node_db_lite; + numMeshNodes = devicestate.node_db_lite.size(); } meshNodes->resize(MAX_NUM_NODES); @@ -669,6 +700,7 @@ void NodeDB::loadFromDisk() state = loadProto(oemConfigFile, meshtastic_OEMStore_size, sizeof(meshtastic_OEMStore), &meshtastic_OEMStore_msg, &oemStore); if (state == LoadFileResult::LOAD_SUCCESS) { LOG_INFO("Loaded OEMStore\n"); + hasOemStore = true; } // 2.4.X - configuration migration to update new default intervals @@ -693,48 +725,26 @@ void NodeDB::loadFromDisk() } /** Save a protobuf from a file, return true for success */ -bool NodeDB::saveProto(const char *filename, size_t protoSize, const pb_msgdesc_t *fields, const void *dest_struct) +bool NodeDB::saveProto(const char *filename, size_t protoSize, const pb_msgdesc_t *fields, const void *dest_struct, + bool fullAtomic) { bool okay = false; #ifdef FSCom - // static DeviceState scratch; We no longer read into a tempbuf because this structure is 15KB of valuable RAM - String filenameTmp = filename; - filenameTmp += ".tmp"; - auto f = FSCom.open(filenameTmp.c_str(), FILE_O_WRITE); - if (f) { - LOG_INFO("Saving %s\n", filename); - pb_ostream_t stream = {&writecb, &f, protoSize}; + auto f = SafeFile(filename, fullAtomic); - if (!pb_encode(&stream, fields, dest_struct)) { - LOG_ERROR("Error: can't encode protobuf %s\n", PB_GET_ERROR(&stream)); - } else { - okay = true; - } - f.flush(); - f.close(); + LOG_INFO("Saving %s\n", filename); + pb_ostream_t stream = {&writecb, static_cast(&f), protoSize}; - // brief window of risk here ;-) - if (FSCom.exists(filename) && !FSCom.remove(filename)) { - LOG_WARN("Can't remove old pref file\n"); - } - if (!renameFile(filenameTmp.c_str(), filename)) { - LOG_ERROR("Error: can't rename new pref file\n"); - } + if (!pb_encode(&stream, fields, dest_struct)) { + LOG_ERROR("Error: can't encode protobuf %s\n", PB_GET_ERROR(&stream)); } else { - LOG_ERROR("Can't write prefs\n"); -#ifdef ARCH_NRF52 - static uint8_t failedCounter = 0; - failedCounter++; - if (failedCounter >= 2) { - LOG_ERROR("Failed to save file twice. Rebooting...\n"); - delay(100); - NVIC_SystemReset(); - // We used to blow away the filesystem here, but that's a bit extreme - // FSCom.format(); - // // After formatting, the device needs to be restarted - // nodeDB->resetRadioConfig(true); - } -#endif + okay = true; + } + + bool writeSucceeded = f.close(); + + if (!okay || !writeSucceeded) { + LOG_ERROR("Can't write prefs!\n"); } #else LOG_ERROR("ERROR: Filesystem not implemented\n"); @@ -742,32 +752,32 @@ bool NodeDB::saveProto(const char *filename, size_t protoSize, const pb_msgdesc_ return okay; } -void NodeDB::saveChannelsToDisk() +bool NodeDB::saveChannelsToDisk() { #ifdef FSCom FSCom.mkdir("/prefs"); #endif - saveProto(channelFileName, meshtastic_ChannelFile_size, &meshtastic_ChannelFile_msg, &channelFile); + return saveProto(channelFileName, meshtastic_ChannelFile_size, &meshtastic_ChannelFile_msg, &channelFile); } -void NodeDB::saveDeviceStateToDisk() +bool NodeDB::saveDeviceStateToDisk() { #ifdef FSCom FSCom.mkdir("/prefs"); #endif - saveProto(prefFileName, sizeof(devicestate) + numMeshNodes * meshtastic_NodeInfoLite_size, &meshtastic_DeviceState_msg, - &devicestate); + // Note: if MAX_NUM_NODES=100 and meshtastic_NodeInfoLite_size=166, so will be approximately 17KB + // Because so huge we _must_ not use fullAtomic, because the filesystem is probably too small to hold two copies of this + return saveProto(prefFileName, sizeof(devicestate) + numMeshNodes * meshtastic_NodeInfoLite_size, &meshtastic_DeviceState_msg, + &devicestate, false); } -void NodeDB::saveToDisk(int saveWhat) +bool NodeDB::saveToDiskNoRetry(int saveWhat) { + bool success = true; + #ifdef FSCom FSCom.mkdir("/prefs"); #endif - if (saveWhat & SEGMENT_DEVICESTATE) { - saveDeviceStateToDisk(); - } - if (saveWhat & SEGMENT_CONFIG) { config.has_device = true; config.has_display = true; @@ -777,7 +787,7 @@ void NodeDB::saveToDisk(int saveWhat) config.has_network = true; config.has_bluetooth = true; - saveProto(configFileName, meshtastic_LocalConfig_size, &meshtastic_LocalConfig_msg, &config); + success &= saveProto(configFileName, meshtastic_LocalConfig_size, &meshtastic_LocalConfig_msg, &config); } if (saveWhat & SEGMENT_MODULECONFIG) { @@ -794,12 +804,45 @@ void NodeDB::saveToDisk(int saveWhat) moduleConfig.has_audio = true; moduleConfig.has_paxcounter = true; - saveProto(moduleConfigFileName, meshtastic_LocalModuleConfig_size, &meshtastic_LocalModuleConfig_msg, &moduleConfig); + success &= + saveProto(moduleConfigFileName, meshtastic_LocalModuleConfig_size, &meshtastic_LocalModuleConfig_msg, &moduleConfig); + } + + // We might need to rewrite the OEM data if we are reformatting the FS + if ((saveWhat & SEGMENT_OEM) && hasOemStore) { + success &= saveProto(oemConfigFile, meshtastic_OEMStore_size, &meshtastic_OEMStore_msg, &oemStore); } if (saveWhat & SEGMENT_CHANNELS) { - saveChannelsToDisk(); + success &= saveChannelsToDisk(); } + + if (saveWhat & SEGMENT_DEVICESTATE) { + success &= saveDeviceStateToDisk(); + } + + return success; +} + +bool NodeDB::saveToDisk(int saveWhat) +{ + bool success = saveToDiskNoRetry(saveWhat); + + if (!success) { + LOG_ERROR("Failed to save to disk, retrying...\n"); +#ifdef ARCH_NRF52 // @geeksville is not ready yet to say we should do this on other platforms. See bug #4184 discussion + FSCom.format(); + + // We need to rewrite the OEM data if we are reformatting the FS + saveWhat |= SEGMENT_OEM; +#endif + success = saveToDiskNoRetry(saveWhat); + + RECORD_CRITICALERROR(success ? meshtastic_CriticalErrorCode_FLASH_CORRUPTION_RECOVERABLE + : meshtastic_CriticalErrorCode_FLASH_CORRUPTION_UNRECOVERABLE); + } + + return success; } const meshtastic_NodeInfoLite *NodeDB::readNextMeshNode(uint32_t &readIndex) @@ -1059,4 +1102,4 @@ void recordCriticalError(meshtastic_CriticalErrorCode code, uint32_t address, co LOG_ERROR("A critical failure occurred, portduino is exiting..."); exit(2); #endif -} +} \ No newline at end of file diff --git a/src/mesh/NodeDB.h b/src/mesh/NodeDB.h index e2c2471d44..c00ab8c037 100644 --- a/src/mesh/NodeDB.h +++ b/src/mesh/NodeDB.h @@ -19,6 +19,7 @@ DeviceState versions used to be defined in the .proto file but really only this #define SEGMENT_MODULECONFIG 2 #define SEGMENT_DEVICESTATE 4 #define SEGMENT_CHANNELS 8 +#define SEGMENT_OEM 16 #define DEVICESTATE_CUR_VER 23 #define DEVICESTATE_MIN_VER 22 @@ -72,8 +73,8 @@ class NodeDB NodeDB(); /// write to flash - void saveToDisk(int saveWhat = SEGMENT_CONFIG | SEGMENT_MODULECONFIG | SEGMENT_DEVICESTATE | SEGMENT_CHANNELS), - saveChannelsToDisk(), saveDeviceStateToDisk(); + /// @return true if the save was successful + bool saveToDisk(int saveWhat = SEGMENT_CONFIG | SEGMENT_MODULECONFIG | SEGMENT_DEVICESTATE | SEGMENT_CHANNELS); /** Reinit radio config if needed, because either: * a) sometimes a buggy android app might send us bogus settings or @@ -130,7 +131,8 @@ class NodeDB LoadFileResult loadProto(const char *filename, size_t protoSize, size_t objSize, const pb_msgdesc_t *fields, void *dest_struct); - bool saveProto(const char *filename, size_t protoSize, const pb_msgdesc_t *fields, const void *dest_struct); + bool saveProto(const char *filename, size_t protoSize, const pb_msgdesc_t *fields, const void *dest_struct, + bool fullAtomic = true); void installRoleDefaults(meshtastic_Config_DeviceConfig_Role role); @@ -181,6 +183,13 @@ class NodeDB /// Reinit device state from scratch (not loading from disk) void installDefaultDeviceState(), installDefaultChannels(), installDefaultConfig(), installDefaultModuleConfig(); + + /// write to flash + /// @return true if the save was successful + bool saveToDiskNoRetry(int saveWhat); + + bool saveChannelsToDisk(); + bool saveDeviceStateToDisk(); }; extern NodeDB *nodeDB; @@ -228,4 +237,4 @@ extern uint32_t error_address; ModuleConfig_RangeTestConfig_size + ModuleConfig_SerialConfig_size + ModuleConfig_StoreForwardConfig_size + \ ModuleConfig_TelemetryConfig_size + ModuleConfig_size) -// Please do not remove this comment, it makes trunk and compiler happy at the same time. +// Please do not remove this comment, it makes trunk and compiler happy at the same time. \ No newline at end of file diff --git a/src/mesh/mesh-pb-constants.cpp b/src/mesh/mesh-pb-constants.cpp index 676208e25a..5b5d669ccf 100644 --- a/src/mesh/mesh-pb-constants.cpp +++ b/src/mesh/mesh-pb-constants.cpp @@ -58,7 +58,7 @@ bool readcb(pb_istream_t *stream, uint8_t *buf, size_t count) /// Write to an arduino file bool writecb(pb_ostream_t *stream, const uint8_t *buf, size_t count) { - File *file = (File *)stream->state; + auto file = (Print *)stream->state; // LOG_DEBUG("writing %d bytes to protobuf file\n", count); return file->write(buf, count) == count; } @@ -71,4 +71,4 @@ bool is_in_helper(uint32_t n, const uint32_t *array, pb_size_t count) return true; return false; -} +} \ No newline at end of file diff --git a/src/modules/Telemetry/Sensor/NAU7802Sensor.cpp b/src/modules/Telemetry/Sensor/NAU7802Sensor.cpp index 39ac4b08b8..3560c6580f 100644 --- a/src/modules/Telemetry/Sensor/NAU7802Sensor.cpp +++ b/src/modules/Telemetry/Sensor/NAU7802Sensor.cpp @@ -5,6 +5,7 @@ #include "../mesh/generated/meshtastic/telemetry.pb.h" #include "FSCommon.h" #include "NAU7802Sensor.h" +#include "SafeFile.h" #include "TelemetrySensor.h" #include #include @@ -95,27 +96,21 @@ void NAU7802Sensor::tare() bool NAU7802Sensor::saveCalibrationData() { - if (FSCom.exists(nau7802ConfigFileName) && !FSCom.remove(nau7802ConfigFileName)) { - LOG_WARN("Can't remove old state file\n"); - } - auto file = FSCom.open(nau7802ConfigFileName, FILE_O_WRITE); + auto file = SafeFile(nau7802ConfigFileName); nau7802config.zeroOffset = nau7802.getZeroOffset(); nau7802config.calibrationFactor = nau7802.getCalibrationFactor(); bool okay = false; - if (file) { - LOG_INFO("%s state write to %s.\n", sensorName, nau7802ConfigFileName); - pb_ostream_t stream = {&writecb, &file, meshtastic_Nau7802Config_size}; - if (!pb_encode(&stream, &meshtastic_Nau7802Config_msg, &nau7802config)) { - LOG_ERROR("Error: can't encode protobuf %s\n", PB_GET_ERROR(&stream)); - } else { - okay = true; - } - file.flush(); - file.close(); + LOG_INFO("%s state write to %s.\n", sensorName, nau7802ConfigFileName); + pb_ostream_t stream = {&writecb, static_cast(&file), meshtastic_Nau7802Config_size}; + + if (!pb_encode(&stream, &meshtastic_Nau7802Config_msg, &nau7802config)) { + LOG_ERROR("Error: can't encode protobuf %s\n", PB_GET_ERROR(&stream)); } else { - LOG_INFO("Can't write %s state (File: %s).\n", sensorName, nau7802ConfigFileName); + okay = true; } + okay &= file.close(); + return okay; }