From ec68f060a10e6d79a3a1168fa35378c75261718f Mon Sep 17 00:00:00 2001 From: Benjamin Wang Date: Fri, 5 May 2023 15:47:04 +0800 Subject: [PATCH 1/4] add failpoint 'resizeFileError' to simulate file.Truncate error Signed-off-by: Benjamin Wang (cherry picked from commit 465077b9e2f3d0bfe593beda5a4b04ea1cd0915a) Signed-off-by: Wei Fu --- db.go | 2 ++ tests/failpoint/db_failpoint_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/db.go b/db.go index 4175bdf3d..e8a2d1c29 100644 --- a/db.go +++ b/db.go @@ -1159,6 +1159,8 @@ func (db *DB) grow(sz int) error { // https://github.com/boltdb/bolt/issues/284 if !db.NoGrowSync && !db.readOnly { if runtime.GOOS != "windows" { + // gofail: var resizeFileError string + // return errors.New(resizeFileError) if err := db.file.Truncate(int64(sz)); err != nil { return fmt.Errorf("file resize error: %s", err) } diff --git a/tests/failpoint/db_failpoint_test.go b/tests/failpoint/db_failpoint_test.go index d9201ef1f..ecd9cf79e 100644 --- a/tests/failpoint/db_failpoint_test.go +++ b/tests/failpoint/db_failpoint_test.go @@ -209,3 +209,31 @@ func TestIssue72(t *testing.T) { func idToBytes(id int) []byte { return []byte(fmt.Sprintf("%010d", id)) } + +func TestFailpoint_ResizeFileFail(t *testing.T) { + db := btesting.MustCreateDB(t) + + err := gofail.Enable("resizeFileError", `return("resizeFile somehow failed")`) + require.NoError(t, err) + + err = db.Fill([]byte("data"), 1, 10000, + func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) }, + func(tx int, k int) []byte { return make([]byte, 100) }, + ) + + require.Error(t, err) + require.ErrorContains(t, err, "resizeFile somehow failed") + + // It should work after disabling the failpoint. + err = gofail.Disable("resizeFileError") + require.NoError(t, err) + db.MustClose() + db.MustReopen() + + err = db.Fill([]byte("data"), 1, 10000, + func(tx int, k int) []byte { return []byte(fmt.Sprintf("%04d", k)) }, + func(tx int, k int) []byte { return make([]byte, 100) }, + ) + + require.NoError(t, err) +} From 79bcca0e518503f3de4c126e7f3a2fa140bf2ba8 Mon Sep 17 00:00:00 2001 From: Marcondes Viana Date: Sat, 29 Jul 2023 17:44:58 -0300 Subject: [PATCH 2/4] tests: add failpoint to simulate lack of disk space Signed-off-by: Marcondes Viana (cherry picked from commit 5ddbd0c94e221668fff3f9d234bd6fbb74d9457e) Signed-off-by: Wei Fu --- tests/failpoint/db_failpoint_test.go | 32 ++++++++++++++++++++++++++++ tx.go | 5 +++++ 2 files changed, 37 insertions(+) diff --git a/tests/failpoint/db_failpoint_test.go b/tests/failpoint/db_failpoint_test.go index ecd9cf79e..7eedbfe3e 100644 --- a/tests/failpoint/db_failpoint_test.go +++ b/tests/failpoint/db_failpoint_test.go @@ -237,3 +237,35 @@ func TestFailpoint_ResizeFileFail(t *testing.T) { require.NoError(t, err) } + +func TestFailpoint_LackOfDiskSpace(t *testing.T) { + db := btesting.MustCreateDB(t) + + err := gofail.Enable("lackOfDiskSpace", `return("grow somehow failed")`) + require.NoError(t, err) + + tx, err := db.Begin(true) + require.NoError(t, err) + + err = tx.Commit() + require.Error(t, err) + require.ErrorContains(t, err, "grow somehow failed") + + err = tx.Rollback() + require.Error(t, err) + require.ErrorIs(t, err, bolt.ErrTxClosed) + + // It should work after disabling the failpoint. + err = gofail.Disable("lackOfDiskSpace") + require.NoError(t, err) + + tx, err = db.Begin(true) + require.NoError(t, err) + + err = tx.Commit() + require.NoError(t, err) + + err = tx.Rollback() + require.Error(t, err) + require.ErrorIs(t, err, bolt.ErrTxClosed) +} diff --git a/tx.go b/tx.go index 7a873066c..766395de3 100644 --- a/tx.go +++ b/tx.go @@ -1,6 +1,7 @@ package bbolt import ( + "errors" "fmt" "io" "os" @@ -185,6 +186,10 @@ func (tx *Tx) Commit() error { // If the high water mark has moved up then attempt to grow the database. if tx.meta.pgid > opgid { + _ = errors.New("") + // gofail: var lackOfDiskSpace string + // tx.rollback() + // return errors.New(lackOfDiskSpace) if err := tx.db.grow(int(tx.meta.pgid+1) * tx.db.pageSize); err != nil { tx.rollback() return err From 0719297af88a34bb07f3c78188415dbce1a20c1b Mon Sep 17 00:00:00 2001 From: Wei Fu Date: Mon, 1 Jan 2024 17:10:05 +0800 Subject: [PATCH 3/4] tests: Update TestRestartFromPowerFailure Update case with a combination of EXT4 filesystem's commit setting and unexpected exit event. That EXT4 filesystem's commit is to sync all its data and metadata every seconds. The kernel can help us sync even if that process has been killed. With different commit setting, we can simulate that case that kernel syncs half part of dirty pages before power failure. And for unexpected exit event, we can kill that process randomly or panic at failpoint instead of fixed code path. Signed-off-by: Wei Fu (cherry picked from commit 4c3a80b2c06cb5a39eeabe422f718a1b6cfc7be2) Signed-off-by: Wei Fu --- tests/dmflakey/dmflakey.go | 5 ++ tests/robustness/powerfailure_test.go | 85 +++++++++++++++++++++++++-- 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/tests/dmflakey/dmflakey.go b/tests/dmflakey/dmflakey.go index d9bdf99a0..25061a4cb 100644 --- a/tests/dmflakey/dmflakey.go +++ b/tests/dmflakey/dmflakey.go @@ -7,6 +7,7 @@ import ( "fmt" "os" "os/exec" + "path" "path/filepath" "strings" "time" @@ -289,6 +290,10 @@ func createEmptyFSImage(imgPath string, fsType FSType) error { return fmt.Errorf("failed to create image because %s already exists", imgPath) } + if err := os.MkdirAll(path.Dir(imgPath), 0600); err != nil { + return fmt.Errorf("failed to ensure parent directory %s: %w", path.Dir(imgPath), err) + } + f, err := os.Create(imgPath) if err != nil { return fmt.Errorf("failed to create image %s: %w", imgPath, err) diff --git a/tests/robustness/powerfailure_test.go b/tests/robustness/powerfailure_test.go index a1d0bc598..09ae88124 100644 --- a/tests/robustness/powerfailure_test.go +++ b/tests/robustness/powerfailure_test.go @@ -4,8 +4,11 @@ package robustness import ( "bytes" + "crypto/rand" "fmt" "io" + "math" + "math/big" "net/http" "net/url" "os" @@ -23,9 +26,65 @@ import ( "golang.org/x/sys/unix" ) +var panicFailpoints = []string{ + "beforeSyncDataPages", + "beforeSyncMetaPage", + "lackOfDiskSpace", + "mapError", + "resizeFileError", + "unmapError", +} + // TestRestartFromPowerFailure is to test data after unexpected power failure. func TestRestartFromPowerFailure(t *testing.T) { - flakey := initFlakeyDevice(t, t.Name(), dmflakey.FSTypeEXT4, "") + for _, tc := range []struct { + name string + du time.Duration + fsMountOpt string + useFailpoint bool + }{ + { + name: "fp_ext4_commit5s", + du: 5 * time.Second, + fsMountOpt: "commit=5", + useFailpoint: true, + }, + { + name: "fp_ext4_commit1s", + du: 10 * time.Second, + fsMountOpt: "commit=1", + useFailpoint: true, + }, + { + name: "fp_ext4_commit1000s", + du: 10 * time.Second, + fsMountOpt: "commit=1000", + useFailpoint: true, + }, + { + name: "kill_ext4_commit5s", + du: 5 * time.Second, + fsMountOpt: "commit=5", + }, + { + name: "kill_ext4_commit1s", + du: 10 * time.Second, + fsMountOpt: "commit=1", + }, + { + name: "kill_ext4_commit1000s", + du: 10 * time.Second, + fsMountOpt: "commit=1000", + }, + } { + t.Run(tc.name, func(t *testing.T) { + doPowerFailure(t, tc.du, tc.fsMountOpt, tc.useFailpoint) + }) + } +} + +func doPowerFailure(t *testing.T, du time.Duration, fsMountOpt string, useFailpoint bool) { + flakey := initFlakeyDevice(t, strings.Replace(t.Name(), "/", "_", -1), dmflakey.FSTypeEXT4, fsMountOpt) root := flakey.RootFS() dbPath := filepath.Join(root, "boltdb") @@ -38,6 +97,8 @@ func TestRestartFromPowerFailure(t *testing.T) { } logPath := filepath.Join(t.TempDir(), fmt.Sprintf("%s.log", t.Name())) + require.NoError(t, os.MkdirAll(path.Dir(logPath), 0600)) + logFd, err := os.Create(logPath) require.NoError(t, err) defer logFd.Close() @@ -64,10 +125,18 @@ func TestRestartFromPowerFailure(t *testing.T) { } }() - time.Sleep(time.Duration(time.Now().UnixNano()%5+1) * time.Second) + time.Sleep(du) t.Logf("simulate power failure") - activeFailpoint(t, fpURL, "beforeSyncMetaPage", "panic") + if useFailpoint { + fpURL = "http://" + fpURL + targetFp := panicFailpoints[randomInt(t, math.MaxInt32)%len(panicFailpoints)] + t.Logf("random pick failpoint: %s", targetFp) + activeFailpoint(t, fpURL, targetFp, "panic") + } else { + t.Log("kill bbolt") + assert.NoError(t, cmd.Process.Kill()) + } select { case <-time.After(10 * time.Second): @@ -89,10 +158,10 @@ func TestRestartFromPowerFailure(t *testing.T) { // activeFailpoint actives the failpoint by http. func activeFailpoint(t *testing.T, targetUrl string, fpName, fpVal string) { - u, err := url.Parse("http://" + path.Join(targetUrl, fpName)) + u, err := url.JoinPath(targetUrl, fpName) require.NoError(t, err, "parse url %s", targetUrl) - req, err := http.NewRequest("PUT", u.String(), bytes.NewBuffer([]byte(fpVal))) + req, err := http.NewRequest("PUT", u, bytes.NewBuffer([]byte(fpVal))) require.NoError(t, err) resp, err := http.DefaultClient.Do(req) @@ -192,3 +261,9 @@ func unmountAll(target string) error { } return fmt.Errorf("failed to umount %s: %w", target, unix.EBUSY) } + +func randomInt(t *testing.T, max int) int { + n, err := rand.Int(rand.Reader, big.NewInt(int64(max))) + assert.NoError(t, err) + return int(n.Int64()) +} From 70ab151d75dbf4411ee6ee99c9aeeef278dbc846 Mon Sep 17 00:00:00 2001 From: Thomas Jungblut Date: Thu, 7 Mar 2024 10:38:15 +0100 Subject: [PATCH 4/4] Add basic XFS powerfailure tests This also introduces mkfs options, in case we need to accomodate for non-default parameters here in the future. Signed-off-by: Thomas Jungblut (cherry picked from commit c27eedcf803fb5c6990d8846275c5136e06de1ae) Signed-off-by: Wei Fu --- .github/workflows/robustness_test.yaml | 3 ++ tests/dmflakey/dmflakey.go | 18 ++++--- tests/dmflakey/dmflakey_test.go | 42 ++++++++------- tests/robustness/powerfailure_test.go | 72 +++++++++++++++++++++++--- 4 files changed, 102 insertions(+), 33 deletions(-) diff --git a/.github/workflows/robustness_test.yaml b/.github/workflows/robustness_test.yaml index 9aca5249e..88ee42d91 100644 --- a/.github/workflows/robustness_test.yaml +++ b/.github/workflows/robustness_test.yaml @@ -12,6 +12,9 @@ jobs: with: go-version: ${{ steps.goversion.outputs.goversion }} - run: | + set -euo pipefail + sudo apt-get install -y dmsetup xfsprogs + make gofail-enable # build bbolt with failpoint go install ./cmd/bbolt diff --git a/tests/dmflakey/dmflakey.go b/tests/dmflakey/dmflakey.go index 25061a4cb..88c3c2d48 100644 --- a/tests/dmflakey/dmflakey.go +++ b/tests/dmflakey/dmflakey.go @@ -90,9 +90,9 @@ const ( // The device-mapper device will be /dev/mapper/$flakeyDevice. And the filesystem // image will be created at $dataStorePath/$flakeyDevice.img. By default, the // device is available for 2 minutes and size is 10 GiB. -func InitFlakey(flakeyDevice, dataStorePath string, fsType FSType) (_ Flakey, retErr error) { +func InitFlakey(flakeyDevice, dataStorePath string, fsType FSType, mkfsOpt string) (_ Flakey, retErr error) { imgPath := filepath.Join(dataStorePath, fmt.Sprintf("%s.img", flakeyDevice)) - if err := createEmptyFSImage(imgPath, fsType); err != nil { + if err := createEmptyFSImage(imgPath, fsType, mkfsOpt); err != nil { return nil, err } defer func() { @@ -276,7 +276,7 @@ func (f *flakey) Teardown() error { // createEmptyFSImage creates empty filesystem on dataStorePath folder with // default size - 10 GiB. -func createEmptyFSImage(imgPath string, fsType FSType) error { +func createEmptyFSImage(imgPath string, fsType FSType, mkfsOpt string) error { if err := validateFSType(fsType); err != nil { return err } @@ -308,10 +308,16 @@ func createEmptyFSImage(imgPath string, fsType FSType) error { imgPath, defaultImgSize, err) } - output, err := exec.Command(mkfs, imgPath).CombinedOutput() + args := []string{imgPath} + if mkfsOpt != "" { + splitArgs := strings.Split(mkfsOpt, " ") + args = append(splitArgs, imgPath) + } + + output, err := exec.Command(mkfs, args...).CombinedOutput() if err != nil { - return fmt.Errorf("failed to mkfs.%s on %s (out: %s): %w", - fsType, imgPath, string(output), err) + return fmt.Errorf("failed to mkfs on %s (%s %v) (out: %s): %w", + imgPath, mkfs, args, string(output), err) } return nil } diff --git a/tests/dmflakey/dmflakey_test.go b/tests/dmflakey/dmflakey_test.go index 41c66db8d..99e2de062 100644 --- a/tests/dmflakey/dmflakey_test.go +++ b/tests/dmflakey/dmflakey_test.go @@ -26,31 +26,35 @@ func TestMain(m *testing.M) { } func TestBasic(t *testing.T) { - tmpDir := t.TempDir() + for _, fsType := range []FSType{FSTypeEXT4, FSTypeXFS} { + t.Run(string(fsType), func(t *testing.T) { + tmpDir := t.TempDir() - flakey, err := InitFlakey("go-dmflakey", tmpDir, FSTypeEXT4) - require.NoError(t, err, "init flakey") - defer func() { - assert.NoError(t, flakey.Teardown()) - }() + flakey, err := InitFlakey("go-dmflakey", tmpDir, fsType, "") + require.NoError(t, err, "init flakey") + defer func() { + assert.NoError(t, flakey.Teardown()) + }() - target := filepath.Join(tmpDir, "root") - require.NoError(t, os.MkdirAll(target, 0600)) + target := filepath.Join(tmpDir, "root") + require.NoError(t, os.MkdirAll(target, 0600)) - require.NoError(t, mount(target, flakey.DevicePath(), "")) - defer func() { - assert.NoError(t, unmount(target)) - }() + require.NoError(t, mount(target, flakey.DevicePath(), "")) + defer func() { + assert.NoError(t, unmount(target)) + }() - file := filepath.Join(target, "test") - assert.NoError(t, writeFile(file, []byte("hello, world"), 0600, true)) + file := filepath.Join(target, "test") + assert.NoError(t, writeFile(file, []byte("hello, world"), 0600, true)) - assert.NoError(t, unmount(target)) + assert.NoError(t, unmount(target)) - assert.NoError(t, flakey.Teardown()) + assert.NoError(t, flakey.Teardown()) + }) + } } -func TestDropWrites(t *testing.T) { +func TestDropWritesExt4(t *testing.T) { flakey, root := initFlakey(t, FSTypeEXT4) // commit=1000 is to delay commit triggered by writeback thread @@ -82,7 +86,7 @@ func TestDropWrites(t *testing.T) { assert.True(t, errors.Is(err, os.ErrNotExist)) } -func TestErrorWrites(t *testing.T) { +func TestErrorWritesExt4(t *testing.T) { flakey, root := initFlakey(t, FSTypeEXT4) // commit=1000 is to delay commit triggered by writeback thread @@ -114,7 +118,7 @@ func initFlakey(t *testing.T, fsType FSType) (_ Flakey, root string) { target := filepath.Join(tmpDir, "root") require.NoError(t, os.MkdirAll(target, 0600)) - flakey, err := InitFlakey("go-dmflakey", tmpDir, FSTypeEXT4) + flakey, err := InitFlakey("go-dmflakey", tmpDir, fsType, "") require.NoError(t, err, "init flakey") t.Cleanup(func() { diff --git a/tests/robustness/powerfailure_test.go b/tests/robustness/powerfailure_test.go index 09ae88124..35ed628b4 100644 --- a/tests/robustness/powerfailure_test.go +++ b/tests/robustness/powerfailure_test.go @@ -35,8 +35,8 @@ var panicFailpoints = []string{ "unmapError", } -// TestRestartFromPowerFailure is to test data after unexpected power failure. -func TestRestartFromPowerFailure(t *testing.T) { +// TestRestartFromPowerFailureExt4 is to test data after unexpected power failure on ext4. +func TestRestartFromPowerFailureExt4(t *testing.T) { for _, tc := range []struct { name string du time.Duration @@ -78,13 +78,69 @@ func TestRestartFromPowerFailure(t *testing.T) { }, } { t.Run(tc.name, func(t *testing.T) { - doPowerFailure(t, tc.du, tc.fsMountOpt, tc.useFailpoint) + doPowerFailure(t, tc.du, dmflakey.FSTypeEXT4, "", tc.fsMountOpt, tc.useFailpoint) }) } } -func doPowerFailure(t *testing.T, du time.Duration, fsMountOpt string, useFailpoint bool) { - flakey := initFlakeyDevice(t, strings.Replace(t.Name(), "/", "_", -1), dmflakey.FSTypeEXT4, fsMountOpt) +func TestRestartFromPowerFailureXFS(t *testing.T) { + for _, tc := range []struct { + name string + mkfsOpt string + fsMountOpt string + useFailpoint bool + }{ + { + name: "xfs_no_opts", + mkfsOpt: "", + fsMountOpt: "", + useFailpoint: true, + }, + { + name: "lazy-log", + mkfsOpt: "-l lazy-count=1", + fsMountOpt: "", + useFailpoint: true, + }, + { + name: "odd-allocsize", + mkfsOpt: "", + fsMountOpt: "allocsize=" + fmt.Sprintf("%d", 4096*5), + useFailpoint: true, + }, + { + name: "nolargeio", + mkfsOpt: "", + fsMountOpt: "nolargeio", + useFailpoint: true, + }, + { + name: "odd-alignment", + mkfsOpt: "-d sunit=1024,swidth=1024", + fsMountOpt: "noalign", + useFailpoint: true, + }, + { + name: "openshift-sno-options", + mkfsOpt: "-m bigtime=1,finobt=1,rmapbt=0,reflink=1 -i sparse=1 -l lazy-count=1", + // openshift also supplies seclabel,relatime,prjquota on RHEL, but that's not supported on our CI + // prjquota is only unsupported on our ARM runners. + // You can find more information in either the man page with `man xfs` or `man mkfs.xfs`. + // Also refer to https://man7.org/linux/man-pages/man8/mkfs.xfs.8.html. + fsMountOpt: "rw,attr2,inode64,logbufs=8,logbsize=32k", + useFailpoint: true, + }, + } { + t.Run(tc.name, func(t *testing.T) { + t.Logf("mkfs opts: %s", tc.mkfsOpt) + t.Logf("mount opts: %s", tc.fsMountOpt) + doPowerFailure(t, 5*time.Second, dmflakey.FSTypeXFS, tc.mkfsOpt, tc.fsMountOpt, tc.useFailpoint) + }) + } +} + +func doPowerFailure(t *testing.T, du time.Duration, fsType dmflakey.FSType, mkfsOpt string, fsMountOpt string, useFailpoint bool) { + flakey := initFlakeyDevice(t, strings.Replace(t.Name(), "/", "_", -1), fsType, mkfsOpt, fsMountOpt) root := flakey.RootFS() dbPath := filepath.Join(root, "boltdb") @@ -185,10 +241,10 @@ type FlakeyDevice interface { } // initFlakeyDevice returns FlakeyDevice instance with a given filesystem. -func initFlakeyDevice(t *testing.T, name string, fsType dmflakey.FSType, mntOpt string) FlakeyDevice { +func initFlakeyDevice(t *testing.T, name string, fsType dmflakey.FSType, mkfsOpt string, mntOpt string) FlakeyDevice { imgDir := t.TempDir() - flakey, err := dmflakey.InitFlakey(name, imgDir, fsType) + flakey, err := dmflakey.InitFlakey(name, imgDir, fsType, mkfsOpt) require.NoError(t, err, "init flakey %s", name) t.Cleanup(func() { assert.NoError(t, flakey.Teardown()) @@ -239,7 +295,7 @@ func (f *flakeyT) PowerFailure(mntOpt string) error { } if err := unix.Mount(f.DevicePath(), f.rootDir, string(f.Filesystem()), 0, mntOpt); err != nil { - return fmt.Errorf("failed to mount rootfs %s: %w", f.rootDir, err) + return fmt.Errorf("failed to mount rootfs %s (%s): %w", f.rootDir, mntOpt, err) } return nil }