Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix behaviour on write failures #32

Merged
merged 1 commit into from
Oct 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions segment/vfs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package segment
import (
"bytes"
"encoding/hex"
"errors"
"fmt"
"io"
"os"
Expand Down Expand Up @@ -162,6 +163,8 @@ type testWritableFile struct {
maxWritten int
lastSyncStart int
closed, dirty bool
writeErr error
syncErr error
}

func newTestWritableFile(size int) *testWritableFile {
Expand All @@ -174,6 +177,14 @@ func (f *testWritableFile) getBuf() []byte {
return f.buf.Load().([]byte)
}

func (f *testWritableFile) failNextWrite() {
f.writeErr = errors.New("IO error")
}

func (f *testWritableFile) failNextSync() {
f.syncErr = errors.New("IO error")
}

// Truncate allows us to simulate the file being a different length to expected
// for example due to a crash.
func (f *testWritableFile) Truncate(size int) {
Expand Down Expand Up @@ -207,6 +218,11 @@ func (f *testWritableFile) Dump() string {
}

func (f *testWritableFile) WriteAt(p []byte, off int64) (n int, err error) {
if f.writeErr != nil {
err := f.writeErr
f.writeErr = nil
return 0, err
}
if !f.dirty {
f.lastSyncStart = int(off)
}
Expand Down Expand Up @@ -268,6 +284,11 @@ func (f *testWritableFile) Close() error {
}

func (f *testWritableFile) Sync() error {
if f.syncErr != nil {
err := f.syncErr
f.syncErr = nil
return err
}
f.dirty = false
return nil
}
22 changes: 22 additions & 0 deletions segment/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,26 @@ func (w *Writer) Append(entries []types.LogEntry) error {
return types.ErrSealed
}

flushed := false

// Save any state we may need to rollback.
beforeBuf := w.writer.commitBuf
beforeCRC := w.writer.crc
beforeIndexStart := w.writer.indexStart
beforeWriteOffset := w.writer.writeOffset
beforeOffsets := w.offsets.Load()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

offsets being atomic here, I'm not sure this is still safe. As we could be overwriting other updates of offsets between the start of the append and when defer runs.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This library relies heavily on the fact that Append may only be called serially and the caller must ensure that. That's why we don't have locks around the writer state for example. I think with that documented constraint there is no way for this to be racey but I might still be wrong so please correct me if you see anything!

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking closer I think it's safe too 😅.

The reason is that we rely on commitIdx from the reader perspective to know if it's safe to read, so even if the offset is incremented it's safe from the reader perspective.
Also we only increment the commitIdx after the flush to disk is successful.


defer func() {
if !flushed {
dhiaayachi marked this conversation as resolved.
Show resolved Hide resolved
// rollback writer state on error
w.writer.commitBuf = beforeBuf
w.writer.crc = beforeCRC
w.writer.indexStart = beforeIndexStart
w.writer.writeOffset = beforeWriteOffset
w.offsets.Store(beforeOffsets)
}
}()

// Iterate entries and append each one
for _, e := range entries {
if err := w.appendEntry(e); err != nil {
Expand All @@ -278,6 +298,8 @@ func (w *Writer) Append(entries []types.LogEntry) error {
return err
}

flushed = true

// Commit in-memory
atomic.StoreUint64(&w.commitIdx, entries[len(entries)-1].Index)
return nil
Expand Down
114 changes: 114 additions & 0 deletions segment/writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
package segment

import (
"fmt"
"sync/atomic"
"testing"
"time"
Expand Down Expand Up @@ -117,3 +118,116 @@ func TestConcurrentReadersAndWriter(t *testing.T) {
require.Greater(t, int(atomic.LoadUint64(&numReads)), 1000)
require.Greater(t, int(atomic.LoadUint64(&sealedMaxIndex)), 1000)
}

func TestWriterRecoversFromWriteFailure(t *testing.T) {

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that the following test cases would fail the first WriteAt or Sync. We could maybe add a test case where some writes pass before a failure happens to simulate a partial write.

Copy link
Member Author

@banks banks Oct 11, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I actually wrote the tests to fail not the first WriteAt initially on the same thought and they did nothing...

It turns out (and I'd forgotten) that we buffering the whole batch in memory and only actually write to the file once per Append. So there is never more than one WriteTo at least at the VFS/file level! See the flush method.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes I understand that as only one flush is done by batch. The test case that I'm referring to is:
1- Call Append, no error
2- Call Append, error
3- Call Append, no error
Verify that call 1 and 3 are in disk and 2 is not. That would prevent a regression where we rewind a successful write.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm still missing something but I think that is what the non-empty variants test here - they first append a batch successfully and then fail the next batch.

I pulled that out of the table because I wanted to assert both versions for every other case so it made more sense to just call t.Run twice per case. Is that what you mean?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🤦 I missed the double t.Run call at the end. Yes that's exactly what I had in mind!

cases := []struct {
name string
setupFailure func(f *testWritableFile, batch []types.LogEntry)
fixFailure func(batch []types.LogEntry)
}{
{
name: "fwrite failure",
setupFailure: func(f *testWritableFile, batch []types.LogEntry) {
f.failNextWrite()
},
},
{
name: "fsync failure",
setupFailure: func(f *testWritableFile, batch []types.LogEntry) {
f.failNextSync()
},
},
{
name: "log append failure",
setupFailure: func(f *testWritableFile, batch []types.LogEntry) {
// Should cause monotonicity check to fail but only on last log after
// other logs have been written and internal state updated.
batch[len(batch)-1].Index = 123456
},
fixFailure: func(batch []types.LogEntry) {
batch[len(batch)-1].Index = batch[len(batch)-2].Index + 1
},
},
}

for _, tc := range cases {
tc := tc

testFn := func(t *testing.T, empty bool) {
vfs := newTestVFS()

f := NewFiler("test", vfs)

seg0 := testSegment(1)

w, err := f.Create(seg0)
require.NoError(t, err)
defer w.Close()

batch := make([]types.LogEntry, 5)
for i := range batch {
batch[i].Index = uint64(i + 1)
batch[i].Data = []byte(fmt.Sprintf("val-%d", i+1))
}
maxIdx := len(batch)
expectFirstIdx := 0
expectLastIdx := 0

if !empty {
require.NoError(t, w.Append(batch))
expectFirstIdx = 1
expectLastIdx = maxIdx
for i := range batch {
batch[i].Index = uint64(i + maxIdx + 1)
batch[i].Data = []byte(fmt.Sprintf("val-%d", i+maxIdx+1))
}
}

tf := testFileFor(t, w)

tc.setupFailure(tf, batch)

require.Error(t, w.Append(batch))
assertExpectedLogs(t, w, expectFirstIdx, expectLastIdx)

if tc.fixFailure != nil {
tc.fixFailure(batch)
}

// Now retry that write, it should work!
expectFirstIdx = 1
expectLastIdx = int(batch[4].Index)
require.NoError(t, w.Append(batch))
assertExpectedLogs(t, w, expectFirstIdx, expectLastIdx)

// Also, re-open the file "from disk" to make sure what has been written
// is correct and recoverable!
w2, err := f.RecoverTail(seg0)
require.NoError(t, err)
assertExpectedLogs(t, w2, expectFirstIdx, expectLastIdx)
w2.Close()
}

t.Run(tc.name+" empty", func(t *testing.T) {
testFn(t, true)
})
t.Run(tc.name+" non-empty", func(t *testing.T) {
testFn(t, false)
})
}
}

func assertExpectedLogs(t *testing.T, w types.SegmentWriter, first, last int) {
t.Helper()

require.Equal(t, uint64(last), w.LastIndex())
if last == 0 {
return
}
for idx := first; idx <= last; idx++ {
buf, err := w.GetLog(uint64(idx))
require.NoError(t, err)
require.Equal(t, fmt.Sprintf("val-%d", idx), string(buf.Bs))
buf.Close()
}
}
Loading