Skip to content

Commit

Permalink
bug #4184: fix config file loss due to filesystem write errors (#4397)
Browse files Browse the repository at this point in the history
* Use SafeFile for atomic file writing (with xor checksum readback)
* Write db.proto last because it could be the largest file on the FS (and less critical)
* Don't keep a tmp file around while writing db.proto (because too big to fit two files in the filesystem)
* generate a new critial fault if we encounter errors writing to flash
either CriticalErrorCode_FLASH_CORRUPTION_RECOVERABLE or CriticalErrorCode_FLASH_CORRUPTION_UNRECOVERABLE
(depending on if the second write attempt worked)
* reformat the filesystem if we detect it is corrupted (then rewrite our config files) (only on nrf52 - not sure
yet if we should bother on ESP32)
* If we have to format the FS, make sure to preserve the oem.proto if it exists

Co-authored-by: Ben Meadors <benmmeadors@gmail.com>
  • Loading branch information
geeksville and thebentern authored Aug 6, 2024
1 parent c1870f9 commit 66c41e6
Show file tree
Hide file tree
Showing 7 changed files with 278 additions and 83 deletions.
99 changes: 99 additions & 0 deletions src/SafeFile.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#include "SafeFile.h"

#ifdef FSCom

// Only way to work on both esp32 and nrf52
static File openFile(const char *filename, bool fullAtomic)
{
if (!fullAtomic)
FSCom.remove(filename); // Nuke the old file to make space (ignore if it !exists)

String filenameTmp = filename;
filenameTmp += ".tmp";

return FSCom.open(filenameTmp.c_str(), FILE_O_WRITE);
}

SafeFile::SafeFile(const char *_filename, bool fullAtomic)
: filename(_filename), f(openFile(_filename, fullAtomic)), fullAtomic(fullAtomic)
{
}

size_t SafeFile::write(uint8_t ch)
{
if (!f)
return 0;

hash ^= ch;
return f.write(ch);
}

size_t SafeFile::write(const uint8_t *buffer, size_t size)
{
if (!f)
return 0;

for (size_t i = 0; i < size; i++) {
hash ^= buffer[i];
}
return f.write((uint8_t const *)buffer, size); // This nasty cast is _IMPORTANT_ otherwise the correct adafruit method does
// not get used (they made a mistake in their typing)
}

/**
* Atomically close the file (deleting any old versions) and readback the contents to confirm the hash matches
*
* @return false for failure
*/
bool SafeFile::close()
{
if (!f)
return false;

f.close();
if (!testReadback())
return false;

// brief window of risk here ;-)
if (fullAtomic && FSCom.exists(filename.c_str()) && !FSCom.remove(filename.c_str())) {
LOG_ERROR("Can't remove old pref file\n");
return false;
}

String filenameTmp = filename;
filenameTmp += ".tmp";
if (!renameFile(filenameTmp.c_str(), filename.c_str())) {
LOG_ERROR("Error: can't rename new pref file\n");
return false;
}

return true;
}

/// Read our (closed) tempfile back in and compare the hash
bool SafeFile::testReadback()
{
String filenameTmp = filename;
filenameTmp += ".tmp";
auto f2 = FSCom.open(filenameTmp.c_str(), FILE_O_READ);
if (!f2) {
LOG_ERROR("Can't open tmp file for readback\n");
return false;
}

int c = 0;
uint8_t test_hash = 0;
while ((c = f2.read()) >= 0) {
test_hash ^= (uint8_t)c;
}
f2.close();

if (test_hash != hash) {
LOG_ERROR("Readback failed hash mismatch\n");
return false;
}

return true;
}

#endif
49 changes: 49 additions & 0 deletions src/SafeFile.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#pragma once

#include "FSCommon.h"
#include "configuration.h"

#ifdef FSCom

/**
* This class provides 'safe'/paranoid file writing.
*
* Some of our filesystems (in particular the nrf52) may have bugs beneath our layer. Therefore we want to
* be very careful about how we write files. This class provides a restricted (Stream only) writing API for writing to files.
*
* Notably:
* - we keep a simple xor hash of all characters that were written.
* - We do not allow seeking (because we want to maintain our hash)
* - we provide an close() method which is similar to close but returns false if we were unable to successfully write the
* file. Also this method
* - atomically replaces any old version of the file on the disk with our new file (after first rereading the file from the disk
* to confirm the hash matches)
* - Some files are super huge so we can't do the full atomic rename/copy (because of filesystem size limits). If !fullAtomic
* then we still do the readback to verify file is valid so higher level code can handle failures.
*/
class SafeFile : public Print
{
public:
SafeFile(char const *filepath, bool fullAtomic = false);

virtual size_t write(uint8_t);
virtual size_t write(const uint8_t *buffer, size_t size);

/**
* Atomically close the file (deleting any old versions) and readback the contents to confirm the hash matches
*
* @return false for failure
*/
bool close();

private:
/// Read our (closed) tempfile back in and compare the hash
bool testReadback();

String filename;
File f;
bool fullAtomic;
uint8_t hash = 0;
};

#endif
2 changes: 1 addition & 1 deletion src/gps/GPS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1090,7 +1090,7 @@ int32_t GPS::runOnce()
if (devicestate.did_gps_reset && scheduling.elapsedSearchMs() > 60 * 1000UL && !hasFlow()) {
LOG_DEBUG("GPS is not communicating, trying factory reset on next bootup.\n");
devicestate.did_gps_reset = false;
nodeDB->saveDeviceStateToDisk();
nodeDB->saveToDisk(SEGMENT_DEVICESTATE);
return disable(); // Stop the GPS thread as it can do nothing useful until next reboot.
}
}
Expand Down
Loading

0 comments on commit 66c41e6

Please sign in to comment.