Skip to content

Commit

Permalink
db: fix FormatPrePebblev1Marked migration
Browse files Browse the repository at this point in the history
Fix the FormatPrePebblev1Marked migration to tolerate concurrent file
deletions by disabling physical deletion of files removed from the LSM
until the migration completes.

Fix cockroachdb#2019.
Informs cockroachdb/cockroach#89755.
Informs cockroachdb/cockroach#83079.
  • Loading branch information
jbowens committed Oct 20, 2022
1 parent 5ed983e commit 94bc4e6
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 29 deletions.
12 changes: 12 additions & 0 deletions format_major_version.go
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,18 @@ func (d *DB) markFilesLocked(findFn findFilesFunc) error {
err error
)
func() {
// Disable file deletions. Some migrations that mark files may need to
// read the files themselves. The LSM may be changing while the
// migration is determining which files to mark. That's ok, because
// after re-acquiring the mutex and acquiring the manifest lock, we'll
// discard any sstables that have been removed from the LSM. However, we
// also need to make sure that the files we try to open during the scan
// are not removed before we read them. This is preferable over
// gracefully handling nonexistent files because some environements (eg,
// Windows) error if you attempt to remove an open file.
d.disableFileDeletions()
defer d.enableFileDeletions()

// Note the unusual locking: unlock, defer Lock(). The scan of the files in
// the version does not need to block other operations that require the
// DB.mu. Drop it for the scan, before re-acquiring it.
Expand Down
57 changes: 57 additions & 0 deletions format_major_version_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@ import (
"bytes"
"fmt"
"strconv"
"sync"
"testing"
"time"

"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/datadriven"
"github.com/cockroachdb/pebble/internal/testkeys"
"github.com/cockroachdb/pebble/sstable"
"github.com/cockroachdb/pebble/vfs"
"github.com/cockroachdb/pebble/vfs/atomicfs"
Expand Down Expand Up @@ -463,3 +465,58 @@ func TestPebblev1Migration(t *testing.T) {
},
)
}

// TestPebblev1MigrationRace exercises the race between a PrePebbleV1Marked
// format major version upgrade that needs to open sstables to read their table
// format, and concurrent compactions that may delete the same files from the
// LSM.
//
// Regression test for #2019.
func TestPebblev1MigrationRace(t *testing.T) {
// Use a smaller table cache size to slow down the PrePebbleV1Marked
// migration, ensuring each table read needs to re-open the file.
cache := NewCache(4 << 20)
defer cache.Unref()
tableCache := NewTableCache(cache, 1, 5)
defer tableCache.Unref()
d, err := Open("", &Options{
Cache: cache,
FS: vfs.NewMem(),
FormatMajorVersion: FormatMajorVersion(FormatPrePebblev1Marked - 1),
TableCache: tableCache,
Levels: []LevelOptions{{TargetFileSize: 1}},
})
require.NoError(t, err)
defer d.Close()

ks := testkeys.Alpha(3).EveryN(10)
var key [3]byte
for i := 0; i < ks.Count(); i++ {
n := testkeys.WriteKey(key[:], ks, i)
require.NoError(t, d.Set(key[:n], key[:n], nil))
require.NoError(t, d.Flush())
}

// Asynchronously write and flush range deletes that will cause compactions
// to delete the existing sstables. These deletes will race with the format
// major version upgrade's migration will attempt to delete the files.
var wg sync.WaitGroup
wg.Add(1)
go func() {
defer wg.Done()
for i := ks.Count() - 1; i > 0; i -= 50 {
endKey := testkeys.Key(ks, i)
startIndex := i - 50
if startIndex < 0 {
startIndex = 0
}
startKey := testkeys.Key(ks, startIndex)

require.NoError(t, d.DeleteRange(startKey, endKey, nil))
_, err := d.AsyncFlush()
require.NoError(t, err)
}
}()
require.NoError(t, d.RatchetFormatMajorVersion(FormatPrePebblev1Marked))
wg.Wait()
}
58 changes: 29 additions & 29 deletions testdata/event_listener
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ sync: wal/000002.log
close: wal/000002.log
create: wal/000005.log
sync: wal
[JOB 4] WAL created 000005
[JOB 5] flushing 1 memtable to L0
[JOB 6] WAL created 000005
[JOB 7] flushing 1 memtable to L0
create: db/000006.sst
[JOB 5] flushing: sstable created 000006
[JOB 7] flushing: sstable created 000006
sync: db/000006.sst
close: db/000006.sst
sync: db
Expand All @@ -98,9 +98,9 @@ sync: db/MANIFEST-000007
create: db/marker.manifest.000003.MANIFEST-000007
close: db/marker.manifest.000003.MANIFEST-000007
sync: db
[JOB 5] MANIFEST created 000007
[JOB 5] flushed 1 memtable to L0 [000006] (770 B), in 1.0s (2.0s total), output rate 770 B/s
[JOB 5] MANIFEST deleted 000001
[JOB 7] MANIFEST created 000007
[JOB 7] flushed 1 memtable to L0 [000006] (770 B), in 1.0s (2.0s total), output rate 770 B/s
[JOB 7] MANIFEST deleted 000001

compact
----
Expand All @@ -109,10 +109,10 @@ sync: wal/000005.log
close: wal/000005.log
reuseForWrite: wal/000002.log -> wal/000008.log
sync: wal
[JOB 6] WAL created 000008 (recycled 000002)
[JOB 7] flushing 1 memtable to L0
[JOB 8] WAL created 000008 (recycled 000002)
[JOB 9] flushing 1 memtable to L0
create: db/000009.sst
[JOB 7] flushing: sstable created 000009
[JOB 9] flushing: sstable created 000009
sync: db/000009.sst
close: db/000009.sst
sync: db
Expand All @@ -122,12 +122,12 @@ sync: db/MANIFEST-000010
create: db/marker.manifest.000004.MANIFEST-000010
close: db/marker.manifest.000004.MANIFEST-000010
sync: db
[JOB 7] MANIFEST created 000010
[JOB 7] flushed 1 memtable to L0 [000009] (770 B), in 1.0s (2.0s total), output rate 770 B/s
[JOB 7] MANIFEST deleted 000003
[JOB 8] compacting(default) L0 [000006 000009] (1.5 K) + L6 [] (0 B)
[JOB 9] MANIFEST created 000010
[JOB 9] flushed 1 memtable to L0 [000009] (770 B), in 1.0s (2.0s total), output rate 770 B/s
[JOB 9] MANIFEST deleted 000003
[JOB 10] compacting(default) L0 [000006 000009] (1.5 K) + L6 [] (0 B)
create: db/000011.sst
[JOB 8] compacting: sstable created 000011
[JOB 10] compacting: sstable created 000011
sync: db/000011.sst
close: db/000011.sst
sync: db
Expand All @@ -137,11 +137,11 @@ sync: db/MANIFEST-000012
create: db/marker.manifest.000005.MANIFEST-000012
close: db/marker.manifest.000005.MANIFEST-000012
sync: db
[JOB 8] MANIFEST created 000012
[JOB 8] compacted(default) L0 [000006 000009] (1.5 K) + L6 [] (0 B) -> L6 [000011] (770 B), in 1.0s (2.0s total), output rate 770 B/s
[JOB 8] sstable deleted 000006
[JOB 8] sstable deleted 000009
[JOB 8] MANIFEST deleted 000007
[JOB 10] MANIFEST created 000012
[JOB 10] compacted(default) L0 [000006 000009] (1.5 K) + L6 [] (0 B) -> L6 [000011] (770 B), in 1.0s (2.0s total), output rate 770 B/s
[JOB 10] sstable deleted 000006
[JOB 10] sstable deleted 000009
[JOB 10] MANIFEST deleted 000007

disable-file-deletions
----
Expand All @@ -153,10 +153,10 @@ sync: wal/000008.log
close: wal/000008.log
reuseForWrite: wal/000005.log -> wal/000013.log
sync: wal
[JOB 9] WAL created 000013 (recycled 000005)
[JOB 10] flushing 1 memtable to L0
[JOB 11] WAL created 000013 (recycled 000005)
[JOB 12] flushing 1 memtable to L0
create: db/000014.sst
[JOB 10] flushing: sstable created 000014
[JOB 12] flushing: sstable created 000014
sync: db/000014.sst
close: db/000014.sst
sync: db
Expand All @@ -166,27 +166,27 @@ sync: db/MANIFEST-000015
create: db/marker.manifest.000006.MANIFEST-000015
close: db/marker.manifest.000006.MANIFEST-000015
sync: db
[JOB 10] MANIFEST created 000015
[JOB 10] flushed 1 memtable to L0 [000014] (770 B), in 1.0s (2.0s total), output rate 770 B/s
[JOB 12] MANIFEST created 000015
[JOB 12] flushed 1 memtable to L0 [000014] (770 B), in 1.0s (2.0s total), output rate 770 B/s

enable-file-deletions
----
[JOB 11] MANIFEST deleted 000010
[JOB 13] MANIFEST deleted 000010

ingest
----
link: ext/0 -> db/000016.sst
[JOB 12] ingesting: sstable created 000016
[JOB 14] ingesting: sstable created 000016
sync: db
create: db/MANIFEST-000017
close: db/MANIFEST-000015
sync: db/MANIFEST-000017
create: db/marker.manifest.000007.MANIFEST-000017
close: db/marker.manifest.000007.MANIFEST-000017
sync: db
[JOB 12] MANIFEST created 000017
[JOB 12] MANIFEST deleted 000012
[JOB 12] ingested L0:000016 (825 B)
[JOB 14] MANIFEST created 000017
[JOB 14] MANIFEST deleted 000012
[JOB 14] ingested L0:000016 (825 B)

metrics
----
Expand Down

0 comments on commit 94bc4e6

Please sign in to comment.