Skip to content

Commit

Permalink
Fixed incorrect cache reuse when seeking from end-of-block
Browse files Browse the repository at this point in the history
In v2.5, we introduced an optimization to avoid rereading data when
seeking inside the file cache. Unfortunately this used a slightly
wrong condition to check if the cache was "live", which meant seeks from
end-of-blocks could end up with invalid caches and wrong data. Not
great.

The problem is the nuance of when a file's cache is "live":

1. The file is marked as LFS_F_READING or LFS_F_WRITING.

   But we can't reuse the cache when writing, so we only care about
   LFS_F_READING.

2. file->off != lfs->cfg->block_size (end-of-block).

   This is an optimization to avoid eagerly reading blocks we may not
   actually care about.

We weren't checking for the end-of-block case, which meant if you seeked
_from_ the end of a block to a seemingly valid location in the file
cache, you could end up with an invalid cache.

Note that end-of-block may not be powers-of-two due to CTZ skip-list
pointers.

---

The fix is to check for the end-of-block case in lfs_file_seek. Note
this now matches the need-new-block logic in lfs_file_flushedread.

This logic change may also make lfs_file_seek call lfs_file_flush more
often, but only in cases where lfs_file_flush is a noop.

I've also extended the test_seek tests to cover a few more boundary-read
cases and prevent a regression in the future.

Found by wjl and lrodorigo
  • Loading branch information
geky committed Dec 19, 2024
1 parent 630a0d8 commit 366100b
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 8 deletions.
9 changes: 2 additions & 7 deletions lfs.c
Original file line number Diff line number Diff line change
Expand Up @@ -3725,13 +3725,8 @@ static lfs_soff_t lfs_file_seek_(lfs_t *lfs, lfs_file_t *file,

// if we're only reading and our new offset is still in the file's cache
// we can avoid flushing and needing to reread the data
if (
#ifndef LFS_READONLY
!(file->flags & LFS_F_WRITING)
#else
true
#endif
) {
if ((file->flags & LFS_F_READING)
&& file->off != lfs->cfg->block_size) {
int oindex = lfs_ctz_index(lfs, &(lfs_off_t){file->pos});
lfs_off_t noff = npos;
int nindex = lfs_ctz_index(lfs, &noff);
Expand Down
149 changes: 148 additions & 1 deletion tests/test_seek.toml
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,130 @@ code = '''
lfs_unmount(&lfs) => 0;
'''

# boundary seek and reads
[cases.test_seek_boundary_read]
defines.COUNT = 132
code = '''
lfs_t lfs;
lfs_format(&lfs, cfg) => 0;
lfs_mount(&lfs, cfg) => 0;
lfs_file_t file;
lfs_file_open(&lfs, &file, "kitty",
LFS_O_WRONLY | LFS_O_CREAT | LFS_O_APPEND) => 0;
size_t size = strlen("kittycatcat");
uint8_t buffer[1024];
memcpy(buffer, "kittycatcat", size);
for (int j = 0; j < COUNT; j++) {
lfs_file_write(&lfs, &file, buffer, size);
}
lfs_file_close(&lfs, &file) => 0;
lfs_unmount(&lfs) => 0;

lfs_mount(&lfs, cfg) => 0;
lfs_file_open(&lfs, &file, "kitty", LFS_O_RDONLY) => 0;

size = strlen("kittycatcat");
const lfs_soff_t offsets[] = {
512,
1024-4,
512+1,
1024-4+1,
512-1,
1024-4-1,

512-strlen("kittycatcat"),
1024-4-strlen("kittycatcat"),
512-strlen("kittycatcat")+1,
1024-4-strlen("kittycatcat")+1,
512-strlen("kittycatcat")-1,
1024-4-strlen("kittycatcat")-1,

strlen("kittycatcat")*(COUNT-2)-1,
};

for (unsigned i = 0; i < sizeof(offsets) / sizeof(offsets[0]); i++) {
lfs_soff_t off = offsets[i];
// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[off % strlen("kittycatcat")],
size) => 0;
// read after
lfs_file_seek(&lfs, &file, off+strlen("kittycatcat")+1, LFS_SEEK_SET)
=> off+strlen("kittycatcat")+1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off+1) % strlen("kittycatcat")],
size) => 0;
// read before
lfs_file_seek(&lfs, &file, off-strlen("kittycatcat")-1, LFS_SEEK_SET)
=> off-strlen("kittycatcat")-1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off-1) % strlen("kittycatcat")],
size) => 0;

// read @ 0
lfs_file_seek(&lfs, &file, 0, LFS_SEEK_SET) => 0;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "kittycatcat", size) => 0;

// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[off % strlen("kittycatcat")],
size) => 0;
// read after
lfs_file_seek(&lfs, &file, off+strlen("kittycatcat")+1, LFS_SEEK_SET)
=> off+strlen("kittycatcat")+1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off+1) % strlen("kittycatcat")],
size) => 0;
// read before
lfs_file_seek(&lfs, &file, off-strlen("kittycatcat")-1, LFS_SEEK_SET)
=> off-strlen("kittycatcat")-1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off-1) % strlen("kittycatcat")],
size) => 0;

// sync
lfs_file_sync(&lfs, &file) => 0;

// read @ 0
lfs_file_seek(&lfs, &file, 0, LFS_SEEK_SET) => 0;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "kittycatcat", size) => 0;

// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[off % strlen("kittycatcat")],
size) => 0;
// read after
lfs_file_seek(&lfs, &file, off+strlen("kittycatcat")+1, LFS_SEEK_SET)
=> off+strlen("kittycatcat")+1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off+1) % strlen("kittycatcat")],
size) => 0;
// read before
lfs_file_seek(&lfs, &file, off-strlen("kittycatcat")-1, LFS_SEEK_SET)
=> off-strlen("kittycatcat")-1;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer,
&"kittycatcatkittycatcat"[(off-1) % strlen("kittycatcat")],
size) => 0;
}

lfs_file_close(&lfs, &file) => 0;
lfs_unmount(&lfs) => 0;
'''

# boundary seek and writes
[cases.test_seek_boundary_write]
defines.COUNT = 132
Expand All @@ -160,31 +284,54 @@ code = '''
lfs_file_open(&lfs, &file, "kitty", LFS_O_RDWR) => 0;

size = strlen("hedgehoghog");
const lfs_soff_t offsets[] = {512, 1020, 513, 1021, 511, 1019, 1441};
const lfs_soff_t offsets[] = {
512,
1024-4,
512+1,
1024-4+1,
512-1,
1024-4-1,

512-strlen("kittycatcat"),
1024-4-strlen("kittycatcat"),
512-strlen("kittycatcat")+1,
1024-4-strlen("kittycatcat")+1,
512-strlen("kittycatcat")-1,
1024-4-strlen("kittycatcat")-1,

strlen("kittycatcat")*(COUNT-2)-1,
};

for (unsigned i = 0; i < sizeof(offsets) / sizeof(offsets[0]); i++) {
lfs_soff_t off = offsets[i];
// write @ offset
memcpy(buffer, "hedgehoghog", size);
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_write(&lfs, &file, buffer, size) => size;

// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "hedgehoghog", size) => 0;

// read @ 0
lfs_file_seek(&lfs, &file, 0, LFS_SEEK_SET) => 0;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "kittycatcat", size) => 0;

// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "hedgehoghog", size) => 0;

lfs_file_sync(&lfs, &file) => 0;

// read @ 0
lfs_file_seek(&lfs, &file, 0, LFS_SEEK_SET) => 0;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "kittycatcat", size) => 0;

// read @ offset
lfs_file_seek(&lfs, &file, off, LFS_SEEK_SET) => off;
lfs_file_read(&lfs, &file, buffer, size) => size;
memcmp(buffer, "hedgehoghog", size) => 0;
Expand Down

0 comments on commit 366100b

Please sign in to comment.