Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
storage/backend: fatal if there is panic during defrag
We should exit as soon as possible if there is panic during defrag. Because that tx might be closed. The inflight request might use invalid tx and then panic as well. However, the real panic might be shadowed by new panic. It's related to goroutine schedule. So, we should fatal here. How to reproduce bbolt#715: ```diff diff --git a/server/etcdserver/api/v3rpc/maintenance.go b/server/etcdserver/api/v3rpc/maintenance.go index 4f55c7c74..0a91b4225 100644 --- a/server/etcdserver/api/v3rpc/maintenance.go +++ b/server/etcdserver/api/v3rpc/maintenance.go @@ -89,7 +89,13 @@ func NewMaintenanceServer(s *etcdserver.EtcdServer, healthNotifier notifier) pb. func (ms *maintenanceServer) Defragment(ctx context.Context, sr *pb.DefragmentRequest) (*pb.DefragmentResponse, error) { ms.lg.Info("starting defragment") ms.healthNotifier.defragStarted() - defer ms.healthNotifier.defragFinished() + defer func() { + ms.healthNotifier.defragFinished() + if rerr := recover(); rerr != nil { + time.Sleep(30 * time.Second) + panic(rerr) + } + }() err := ms.bg.Backend().Defrag() if err != nil { ms.lg.Warn("failed to defragment", zap.Error(err)) ``` ```bash $ make gofail-enable $ make $ GOFAIL_HTTP="127.0.0.1:1234" bin/etcd $ New Terminal $ curl http://127.0.0.1:1234/defragBeforeRename -XPUT -d'panic()' $ bin/etcdctl defrag $ Old Terminal {"level":"info","ts":"2024-04-09T23:28:54.084434+0800","caller":"v3rpc/maintenance.go:90","msg":"starting defragment"} {"level":"info","ts":"2024-04-09T23:28:54.087363+0800","caller":"backend/backend.go:509","msg":"defragmenting","path":"default.etcd/member/snap/db","current-db-size-bytes":20480,"current-db-size":"20 kB","current-db-size-in-use-bytes":16384,"current-db-size-in-use":"16 kB"} {"level":"info","ts":"2024-04-09T23:28:54.091229+0800","caller":"v3rpc/health.go:62","msg":"grpc service status changed","service":"","status":"SERVING"} panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0xbb2553] goroutine 156 [running]: go.etcd.io/bbolt.(*Tx).Bucket(...) go.etcd.io/bbolt@v1.4.0-alpha.0/tx.go:112 go.etcd.io/etcd/server/v3/storage/backend.(*baseReadTx).UnsafeRange(0xc00061ac80, {0x12b7780, 0x1a73420}, {0x1a113e8, 0xe, 0xe}, {0x0, 0x0, 0x0}, 0x1) go.etcd.io/etcd/server/v3/storage/backend/read_tx.go:103 +0x233 go.etcd.io/etcd/server/v3/storage/schema.UnsafeReadStorageVersion({0x7f6280613618?, 0xc00061ac80?}) go.etcd.io/etcd/server/v3/storage/schema/version.go:35 +0x5d go.etcd.io/etcd/server/v3/storage/schema.UnsafeDetectSchemaVersion(0xc000334b80, {0x7f6280613618, 0xc00061ac80}) go.etcd.io/etcd/server/v3/storage/schema/schema.go:94 +0x47 go.etcd.io/etcd/server/v3/storage/schema.DetectSchemaVersion(0xc000334b80, {0x12b77f0, 0xc00061ac80}) go.etcd.io/etcd/server/v3/storage/schema/schema.go:89 +0xf2 go.etcd.io/etcd/server/v3/etcdserver.(*EtcdServer).StorageVersion(0xc000393c08) go.etcd.io/etcd/server/v3/etcdserver/server.go:2216 +0x105 go.etcd.io/etcd/server/v3/etcdserver.(*serverVersionAdapter).GetStorageVersion(0x0?) go.etcd.io/etcd/server/v3/etcdserver/adapters.go:77 +0x16 go.etcd.io/etcd/server/v3/etcdserver/version.(*Monitor).UpdateStorageVersionIfNeeded(0xc00002df70) go.etcd.io/etcd/server/v3/etcdserver/version/monitor.go:112 +0x5d go.etcd.io/etcd/server/v3/etcdserver.(*EtcdServer).monitorStorageVersion(0xc000393c08) go.etcd.io/etcd/server/v3/etcdserver/server.go:2259 +0xa8 go.etcd.io/etcd/server/v3/etcdserver.(*EtcdServer).GoAttach.func1() go.etcd.io/etcd/server/v3/etcdserver/server.go:2440 +0x59 created by go.etcd.io/etcd/server/v3/etcdserver.(*EtcdServer).GoAttach in goroutine 1 go.etcd.io/etcd/server/v3/etcdserver/server.go:2438 +0xf9 ``` REF: etcd-io/bbolt#715 Signed-off-by: Wei Fu <fuweid89@gmail.com>
- Loading branch information