Skip to content

Commit

Permalink
internal/manifest: Relax SeqNum overlap invariant in L0
Browse files Browse the repository at this point in the history
Changes Manifest.CheckOrdering to allow L0 SSTables to overlap
in sequence numbers.

Implemented for better compatibility with future versions of
Pebble which could support partitioned flushes and L0 sublevels.

Fixes #587.
  • Loading branch information
itsbilal committed Mar 30, 2020
1 parent a06c162 commit 6593c22
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 26 deletions.
53 changes: 50 additions & 3 deletions internal/manifest/testdata/version_check_ordering
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,58 @@ check-ordering
L0
c.SET.3-d.SET.4
a.SET.1-b.SET.5
e.SET.2-f.SET.7
g.SET.6-h.SET.12
i.SET.8-j.SET.13
b.SET.15-d.SET.15
a.SET.14-j.SET.17
k.SET.16-n.SET.19
----
L0 flushed file 000002 overlaps with the largest seqnum of a preceding flushed file: 1-5 vs 4
OK

# Add some ingested SSTables around the 14-19 seqnum cases.
check-ordering
L0
c.SET.3-d.SET.4
a.SET.1-b.SET.5
e.SET.2-f.SET.7
g.SET.6-h.SET.12
i.SET.8-j.SET.13
b.SET.15-d.SET.15
a.SET.14-j.SET.17
m.SET.18-n.SET.18
k.SET.16-n.SET.19
m.SET.20-n.SET.20
----
OK

# Ensure that coincident sequence numbers are detected around sstables with
# overlapping sequence numbers.
check-ordering
L0
c.SET.3-d.SET.4
a.SET.1-b.SET.5
e.SET.2-f.SET.7
g.SET.6-h.SET.12
i.SET.8-j.SET.13
b.SET.15-d.SET.15
a.SET.15-j.SET.17
m.SET.18-n.SET.18
k.SET.16-n.SET.19
m.SET.20-n.SET.20
----
L0 flushed file 000007 has smallest sequence number coincident with an ingested file : 15-17 vs 15
0:
000001:[c#3,SET-d#4,SET]
000002:[a#1,SET-b#5,SET]
000003:[e#2,SET-f#7,SET]
000004:[g#6,SET-h#12,SET]
000005:[i#8,SET-j#13,SET]
000006:[b#15,SET-d#15,SET]
000007:[a#15,SET-j#17,SET]
000008:[m#18,SET-n#18,SET]
000009:[k#16,SET-n#19,SET]
000010:[m#20,SET-n#20,SET]

check-ordering
L0
Expand Down Expand Up @@ -66,7 +113,7 @@ L0
a.SET.3-d.SET.3
a.SET.3-d.SET.5
----
L0 flushed file 000002 has an ingested file coincident with smallest seqnum: 3-5
L0 flushed file 000002 has smallest sequence number coincident with an ingested file : 3-5 vs 3
0:
000001:[a#3,SET-d#3,SET]
000002:[a#3,SET-d#5,SET]
Expand Down Expand Up @@ -94,7 +141,7 @@ L0
a.SET.5-d.SET.5
a.SET.4-d.SET.6
----
L0 flushed file 000003 has an ingested file coincident with smallest seqnum: 4-6
L0 flushed file 000003 has smallest sequence number coincident with an ingested file : 4-6 vs 4
0:
000001:[a#4,SET-d#4,SET]
000002:[a#5,SET-d#5,SET]
Expand Down
6 changes: 5 additions & 1 deletion internal/manifest/testdata/version_edit_apply
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,11 @@ edit
L0
4:b.SET.3-d.SET.5
----
pebble: internal error: L0 flushed file 000004 overlaps with the largest seqnum of a preceding flushed file: 3-5 vs 4
0:
000001:[a#1,SET-c#2,SET]
000002:[c#3,SET-d#4,SET]
000004:[b#3,SET-d#5,SET]
zombies []

apply
L0
Expand Down
27 changes: 14 additions & 13 deletions internal/manifest/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,7 @@ func CheckOrdering(cmp Compare, format base.Formatter, level int, files []*FileM
// file (except for the 0 sequence number case below).
// - Files with multiple sequence numbers: these are necessarily flushed files.
//
// Two cases of overlapping sequence numbers:
// Three cases of overlapping sequence numbers:
// Case 1:
// An ingested file contained in the sequence numbers of the flushed file -- it must be
// fully contained (not coincident with either end of the flushed file) since the memtable
Expand All @@ -469,9 +469,17 @@ func CheckOrdering(cmp Compare, format base.Formatter, level int, files []*FileM
// in the file key intervals. This file is placed in L0 since it overlaps in the file
// key intervals but since it has no overlapping data, it is assigned a sequence number
// of 0 in RocksDB. We handle this case for compatibility with RocksDB.

// The largest sequence number of a flushed file. Increasing.
var largestFlushedSeqNum uint64
//
// Case 3:
// A sequence of flushed files that overlap in sequence numbers with one another,
// but do not overlap in keys inside the sstables. These files correspond to
// partitioned flushes or the results of intra-L0 compactions of partitioned
// flushes.
//
// Since these types of SSTables violate most other sequence number
// overlap invariants, and handling this case is important for compatibility
// with future versions of pebble, this method relaxes most L0 invariant
// checks except for those concerning ingested SSTables.

// The largest sequence number of any file. Increasing.
var largestSeqNum uint64
Expand Down Expand Up @@ -510,20 +518,13 @@ func CheckOrdering(cmp Compare, format base.Formatter, level int, files []*FileM
uncheckedIngestedSeqNums = append(uncheckedIngestedSeqNums, f.LargestSeqNum)
} else {
// Flushed file.
// Two flushed files cannot overlap.
if largestFlushedSeqNum > 0 && f.SmallestSeqNum <= largestFlushedSeqNum {
return fmt.Errorf("L0 flushed file %06d overlaps with the largest seqnum of a "+
"preceding flushed file: %d-%d vs %d", f.FileNum, f.SmallestSeqNum, f.LargestSeqNum,
largestFlushedSeqNum)
}
largestFlushedSeqNum = f.LargestSeqNum
// Check that unchecked ingested sequence numbers are not coincident with f.SmallestSeqNum.
// We do not need to check that they are not coincident with f.LargestSeqNum because we
// have already confirmed that LargestSeqNums were increasing.
for _, seq := range uncheckedIngestedSeqNums {
if seq == f.SmallestSeqNum {
return fmt.Errorf("L0 flushed file %06d has an ingested file coincident with "+
"smallest seqnum: %d-%d", f.FileNum, f.SmallestSeqNum, f.LargestSeqNum)
return fmt.Errorf("L0 flushed file %06d has smallest sequence number coincident with an ingested file "+
": %d-%d vs %d", f.FileNum, f.SmallestSeqNum, f.LargestSeqNum, seq)
}
}
uncheckedIngestedSeqNums = uncheckedIngestedSeqNums[:0]
Expand Down
3 changes: 2 additions & 1 deletion testdata/compaction_check_ordering
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@ L0
----
fatal: L0 files 000001 and 000002 are not properly ordered: 3-4 vs 1-2

# Seqnum overlaps are allowed in L0 as long as no key ranges overlap.
check-ordering
L0
c.SET.3-d.SET.4
a.SET.1-b.SET.5
----
fatal: L0 flushed file 000002 overlaps with the largest seqnum of a preceding flushed file: 1-5 vs 4
OK

check-ordering
L0
Expand Down
4 changes: 2 additions & 2 deletions tool/make_incorrect_manifests.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ func makeManifest1() {
ve.NextFileNum = 5
ve.LastSeqNum = 20
ve.NewFiles = []manifest.NewFileEntry{
{0, &manifest.FileMetadata{
{6, &manifest.FileMetadata{
FileNum: 1, SmallestSeqNum: 2, LargestSeqNum: 5}}}
writeVE(writer, &ve)

ve.MinUnflushedLogNum = 3
ve.NewFiles = []manifest.NewFileEntry{
{0, &manifest.FileMetadata{
{6, &manifest.FileMetadata{
FileNum: 2, SmallestSeqNum: 1, LargestSeqNum: 4}}}
writeVE(writer, &ve)

Expand Down
Binary file modified tool/testdata/MANIFEST-invalid
Binary file not shown.
12 changes: 6 additions & 6 deletions tool/testdata/manifest_dump
Original file line number Diff line number Diff line change
Expand Up @@ -162,31 +162,31 @@ MANIFEST-invalid
log-num: 2
next-file-num: 5
last-seq-num: 20
added: L0 000001:0<#2-#5>[#0,DEL-#0,DEL]
added: L6 000001:0<#2-#5>[#0,DEL-#0,DEL]
65
comparer: leveldb.BytewiseComparator
log-num: 3
next-file-num: 5
last-seq-num: 20
added: L0 000002:0<#1-#4>[#0,DEL-#0,DEL]
added: L6 000002:0<#1-#4>[#0,DEL-#0,DEL]
EOF
pebble: internal error: L0 flushed file 000001 overlaps with the largest seqnum of a preceding flushed file: 2-5 vs 4
pebble: internal error: L6 files 000001 and 000002 have overlapping ranges: #0,DEL-#0,DEL vs #0,DEL-#0,DEL

manifest check
./testdata/MANIFEST-invalid
----
MANIFEST-invalid: offset: 65 err: pebble: internal error: L0 flushed file 000001 overlaps with the largest seqnum of a preceding flushed file: 2-5 vs 4
MANIFEST-invalid: offset: 65 err: pebble: internal error: L6 files 000001 and 000002 have overlapping ranges: #0,DEL-#0,DEL vs #0,DEL-#0,DEL
Version state before failed Apply
--- L0 ---
000001:0<#2-#5>[#0,DEL-#0,DEL]
--- L1 ---
--- L2 ---
--- L3 ---
--- L4 ---
--- L5 ---
--- L6 ---
000001:0<#2-#5>[#0,DEL-#0,DEL]
Version edit that failed
added: L0 000002:0<#1-#4>[#0,DEL-#0,DEL]
added: L6 000002:0<#1-#4>[#0,DEL-#0,DEL]

manifest dump
./testdata/find-db/MANIFEST-000001
Expand Down

0 comments on commit 6593c22

Please sign in to comment.