forked from cockroachdb/pebble
-
Notifications
You must be signed in to change notification settings - Fork 0
/
version_set.go
388 lines (354 loc) · 10 KB
/
version_set.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
// Copyright 2012 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.
package pebble
import (
"fmt"
"io"
"os"
"sync"
"sync/atomic"
"github.com/petermattis/pebble/internal/base"
"github.com/petermattis/pebble/internal/record"
"github.com/petermattis/pebble/vfs"
)
// versionSet manages a collection of immutable versions, and manages the
// creation of a new version from the most recent version. A new versions is
// created from an existing version by applying a version edit which is just
// like it sounds: a delta from the previous version. Version edits are logged
// to the manifest file, which is replayed at startup.
type versionSet struct {
// Immutable fields.
dirname string
mu *sync.Mutex
opts *Options
fs vfs.FS
cmp Compare
cmpName string
// Dynamic base level allows the dynamic base level computation to be
// disabled. Used by tests which want to create specific LSM structures.
dynamicBaseLevel bool
// Mutable fields.
versions versionList
picker *compactionPicker
metrics VersionMetrics
obsoleteTables []uint64
obsoleteManifests []uint64
obsoleteOptions []uint64
logNumber uint64
prevLogNumber uint64
nextFileNumber uint64
logSeqNum uint64 // next seqNum to use for WAL writes
visibleSeqNum uint64 // visible seqNum (<= logSeqNum)
manifestFileNumber uint64
manifestFile vfs.File
manifest *record.Writer
writing bool
writerCond sync.Cond
}
// load loads the version set from the manifest file.
func (vs *versionSet) load(dirname string, opts *Options, mu *sync.Mutex) error {
vs.dirname = dirname
vs.mu = mu
vs.versions.mu = mu
vs.writerCond.L = mu
vs.opts = opts
vs.fs = opts.FS
vs.cmp = opts.Comparer.Compare
vs.cmpName = opts.Comparer.Name
vs.dynamicBaseLevel = true
vs.versions.init()
// For historical reasons, the next file number is initialized to 2.
vs.nextFileNumber = 2
// Read the CURRENT file to find the current manifest file.
current, err := vs.fs.Open(base.MakeFilename(dirname, fileTypeCurrent, 0))
if err != nil {
return fmt.Errorf("pebble: could not open CURRENT file for DB %q: %v", dirname, err)
}
defer current.Close()
stat, err := current.Stat()
if err != nil {
return err
}
n := stat.Size()
if n == 0 {
return fmt.Errorf("pebble: CURRENT file for DB %q is empty", dirname)
}
if n > 4096 {
return fmt.Errorf("pebble: CURRENT file for DB %q is too large", dirname)
}
b := make([]byte, n)
_, err = current.ReadAt(b, 0)
if err != nil {
return err
}
if b[n-1] != '\n' {
return fmt.Errorf("pebble: CURRENT file for DB %q is malformed", dirname)
}
b = b[:n-1]
// Read the versionEdits in the manifest file.
var bve bulkVersionEdit
manifest, err := vs.fs.Open(dirname + string(os.PathSeparator) + string(b))
if err != nil {
return fmt.Errorf("pebble: could not open manifest file %q for DB %q: %v", b, dirname, err)
}
defer manifest.Close()
rr := record.NewReader(manifest, 0 /* logNum */)
for {
r, err := rr.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
var ve versionEdit
err = ve.decode(r)
if err != nil {
return err
}
if ve.comparatorName != "" {
if ve.comparatorName != vs.cmpName {
return fmt.Errorf("pebble: manifest file %q for DB %q: "+
"comparer name from file %q != comparer name from Options %q",
b, dirname, ve.comparatorName, vs.cmpName)
}
}
bve.accumulate(&ve)
if ve.logNumber != 0 {
vs.logNumber = ve.logNumber
}
if ve.prevLogNumber != 0 {
vs.prevLogNumber = ve.prevLogNumber
}
if ve.nextFileNumber != 0 {
vs.nextFileNumber = ve.nextFileNumber
}
if ve.lastSequence != 0 {
vs.logSeqNum = ve.lastSequence
}
}
if vs.logNumber == 0 || vs.nextFileNumber == 0 {
if vs.nextFileNumber == 2 {
// We have a freshly created DB.
} else {
return fmt.Errorf("pebble: incomplete manifest file %q for DB %q", b, dirname)
}
}
vs.markFileNumUsed(vs.logNumber)
vs.markFileNumUsed(vs.prevLogNumber)
newVersion, err := bve.apply(opts, nil, vs.cmp)
if err != nil {
return err
}
vs.append(newVersion)
return nil
}
// logAndApply logs the version edit to the manifest, applies the version edit
// to the current version, and installs the new version. DB.mu must be held
// when calling this method and will be released temporarily while performing
// file I/O.
func (vs *versionSet) logAndApply(jobID int, ve *versionEdit, dir vfs.File) error {
// Wait for any existing writing to the manifest to complete, then mark the
// manifest as busy.
for vs.writing {
vs.writerCond.Wait()
}
vs.writing = true
defer func() {
vs.writing = false
vs.writerCond.Signal()
}()
if ve.logNumber != 0 {
if ve.logNumber < vs.logNumber || vs.nextFileNumber <= ve.logNumber {
panic(fmt.Sprintf("pebble: inconsistent versionEdit logNumber %d", ve.logNumber))
}
}
ve.nextFileNumber = vs.nextFileNumber
ve.lastSequence = atomic.LoadUint64(&vs.logSeqNum)
currentVersion := vs.currentVersion()
var newVersion *version
// Generate a new manifest if we don't currently have one, or the current one
// is too large.
var newManifestFileNumber uint64
if vs.manifest == nil || vs.manifest.Size() >= vs.opts.MaxManifestFileSize {
newManifestFileNumber = vs.nextFileNum()
}
var picker *compactionPicker
if err := func() error {
vs.mu.Unlock()
defer vs.mu.Lock()
var bve bulkVersionEdit
bve.accumulate(ve)
var err error
newVersion, err = bve.apply(vs.opts, currentVersion, vs.cmp)
if err != nil {
return err
}
if newManifestFileNumber != 0 {
if err := vs.createManifest(vs.dirname, newManifestFileNumber); err != nil {
if vs.opts.EventListener.ManifestCreated != nil {
vs.opts.EventListener.ManifestCreated(ManifestCreateInfo{
JobID: jobID,
Path: base.MakeFilename(vs.dirname, fileTypeManifest, newManifestFileNumber),
FileNum: newManifestFileNumber,
Err: err,
})
}
return err
}
}
w, err := vs.manifest.Next()
if err != nil {
return err
}
// NB: Any error from this point on is considered fatal as we don't now if
// the MANIFEST write occurred or not. Trying to determine that is
// fraught. Instead we rely on the standard recovery mechanism run when a
// database is open. In particular, that mechanism generates a new MANIFEST
// and ensures it is synced.
if err := ve.encode(w); err != nil {
vs.opts.Logger.Fatalf("MANIFEST write failed: %v", err)
return err
}
if err := vs.manifest.Flush(); err != nil {
vs.opts.Logger.Fatalf("MANIFEST flush failed: %v", err)
return err
}
if err := vs.manifestFile.Sync(); err != nil {
vs.opts.Logger.Fatalf("MANIFEST sync failed: %v", err)
return err
}
if newManifestFileNumber != 0 {
if err := setCurrentFile(vs.dirname, vs.fs, newManifestFileNumber); err != nil {
vs.opts.Logger.Fatalf("MANIFEST set current failed: %v", err)
return err
}
if err := dir.Sync(); err != nil {
vs.opts.Logger.Fatalf("MANIFEST dirsync failed: %v", err)
return err
}
if vs.opts.EventListener.ManifestCreated != nil {
vs.opts.EventListener.ManifestCreated(ManifestCreateInfo{
JobID: jobID,
Path: base.MakeFilename(vs.dirname, fileTypeManifest, newManifestFileNumber),
FileNum: newManifestFileNumber,
})
}
}
picker = newCompactionPicker(newVersion, vs.opts)
if !vs.dynamicBaseLevel {
picker.baseLevel = 1
}
return nil
}(); err != nil {
return err
}
// Install the new version.
vs.append(newVersion)
if ve.logNumber != 0 {
vs.logNumber = ve.logNumber
}
if ve.prevLogNumber != 0 {
vs.prevLogNumber = ve.prevLogNumber
}
if newManifestFileNumber != 0 {
if vs.manifestFileNumber != 0 {
vs.obsoleteManifests = append(vs.obsoleteManifests, vs.manifestFileNumber)
}
vs.manifestFileNumber = newManifestFileNumber
}
vs.picker = picker
if ve.metrics != nil {
for level, update := range ve.metrics {
vs.metrics.Levels[level].Add(update)
}
}
for i := range vs.metrics.Levels {
l := &vs.metrics.Levels[i]
l.NumFiles = int64(len(newVersion.files[i]))
l.Size = uint64(totalSize(newVersion.files[i]))
}
return nil
}
// createManifest creates a manifest file that contains a snapshot of vs.
func (vs *versionSet) createManifest(dirname string, fileNum uint64) (err error) {
var (
filename = base.MakeFilename(dirname, fileTypeManifest, fileNum)
manifestFile vfs.File
manifest *record.Writer
)
defer func() {
if manifest != nil {
manifest.Close()
}
if manifestFile != nil {
manifestFile.Close()
}
if err != nil {
vs.fs.Remove(filename)
}
}()
manifestFile, err = vs.fs.Create(filename)
if err != nil {
return err
}
manifest = record.NewWriter(manifestFile)
snapshot := versionEdit{
comparatorName: vs.cmpName,
}
for level, fileMetadata := range vs.currentVersion().files {
for _, meta := range fileMetadata {
snapshot.newFiles = append(snapshot.newFiles, newFileEntry{
level: level,
meta: meta,
})
}
}
w, err1 := manifest.Next()
if err1 != nil {
return err1
}
if err := snapshot.encode(w); err != nil {
return err
}
vs.manifest, manifest = manifest, nil
vs.manifestFile, manifestFile = manifestFile, nil
return nil
}
func (vs *versionSet) markFileNumUsed(fileNum uint64) {
if vs.nextFileNumber <= fileNum {
vs.nextFileNumber = fileNum + 1
}
}
func (vs *versionSet) nextFileNum() uint64 {
x := vs.nextFileNumber
vs.nextFileNumber++
return x
}
func (vs *versionSet) append(v *version) {
if v.refs != 0 {
panic("pebble: version should be unreferenced")
}
if !vs.versions.empty() {
vs.versions.back().unrefLocked()
}
v.vs = vs
v.ref()
vs.versions.pushBack(v)
}
func (vs *versionSet) currentVersion() *version {
return vs.versions.back()
}
func (vs *versionSet) addLiveFileNums(m map[uint64]struct{}) {
for v := vs.versions.root.next; v != &vs.versions.root; v = v.next {
for _, ff := range v.files {
for _, f := range ff {
m[f.fileNum] = struct{}{}
}
}
}
}
func (vs *versionSet) addObsoleteLocked(obsolete []uint64) {
vs.obsoleteTables = append(vs.obsoleteTables, obsolete...)
}