-
Notifications
You must be signed in to change notification settings - Fork 180
/
compactor.go
393 lines (324 loc) · 11 KB
/
compactor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
package complete
import (
"context"
"errors"
"fmt"
"time"
"github.com/rs/zerolog"
"golang.org/x/sync/semaphore"
"github.com/onflow/flow-go/ledger"
"github.com/onflow/flow-go/ledger/complete/mtrie/trie"
realWAL "github.com/onflow/flow-go/ledger/complete/wal"
"github.com/onflow/flow-go/module/lifecycle"
"github.com/onflow/flow-go/module/observable"
)
type WALTrieUpdate struct {
Update *ledger.TrieUpdate
ResultCh chan<- error
DoneCh <-chan struct{}
}
type checkpointResult struct {
num int
err error
}
type Compactor struct {
checkpointer *realWAL.Checkpointer
wal *realWAL.DiskWAL
ledger *Ledger
logger zerolog.Logger
stopCh chan chan struct{}
trieUpdateCh <-chan *WALTrieUpdate
trieUpdateDoneCh chan<- struct{}
lm *lifecycle.LifecycleManager
observers map[observable.Observer]struct{}
checkpointDistance uint
checkpointsToKeep uint
}
func NewCompactor(l *Ledger, w *realWAL.DiskWAL, checkpointDistance uint, checkpointsToKeep uint, logger zerolog.Logger) (*Compactor, error) {
if checkpointDistance < 1 {
checkpointDistance = 1
}
checkpointer, err := w.NewCheckpointer()
if err != nil {
return nil, err
}
trieUpdateCh := l.TrieUpdateChan()
if trieUpdateCh == nil {
return nil, errors.New("failed to get valid trie update channel from ledger")
}
trieUpdateDoneCh := l.TrieUpdateDoneChan()
if trieUpdateDoneCh == nil {
return nil, errors.New("failed to get valid trie update done channel from ledger")
}
return &Compactor{
checkpointer: checkpointer,
wal: w,
ledger: l,
logger: logger,
stopCh: make(chan chan struct{}),
trieUpdateCh: trieUpdateCh,
trieUpdateDoneCh: l.trieUpdateDoneCh,
observers: make(map[observable.Observer]struct{}),
lm: lifecycle.NewLifecycleManager(),
checkpointDistance: checkpointDistance,
checkpointsToKeep: checkpointsToKeep,
}, nil
}
func (c *Compactor) Subscribe(observer observable.Observer) {
var void struct{}
c.observers[observer] = void
}
func (c *Compactor) Unsubscribe(observer observable.Observer) {
delete(c.observers, observer)
}
// Ready periodically fires Run function, every `interval`
func (c *Compactor) Ready() <-chan struct{} {
c.lm.OnStart(func() {
go c.run()
})
return c.lm.Started()
}
func (c *Compactor) Done() <-chan struct{} {
c.lm.OnStop(func() {
// Notify observers
for observer := range c.observers {
observer.OnComplete()
}
// Signal Compactor goroutine to stop
doneCh := make(chan struct{})
c.stopCh <- doneCh
// Wait for Compactor goroutine to stop
<-doneCh
// Close trieUpdateDoneCh to signal trie updates are finished
close(c.trieUpdateDoneCh)
})
return c.lm.Stopped()
}
func (c *Compactor) run() {
// checkpointSem is used to limit checkpointing to one.
// If previous checkpointing isn't finished when enough segments
// are finalized for next checkpointing, retry checkpointing
// again when next segment is finalized.
// This avoids having more tries in memory than needed.
checkpointSem := semaphore.NewWeighted(1)
checkpointResultCh := make(chan checkpointResult, 1)
// Get active segment number.
// activeSegmentNum is updated when record is written to a new segment.
_, activeSegmentNum, err := c.wal.Segments()
if err != nil {
c.logger.Error().Err(err).Msg("compactor failed to get active segment number")
activeSegmentNum = -1
}
lastCheckpointNum, err := c.checkpointer.LatestCheckpoint()
if err != nil {
c.logger.Error().Err(err).Msg("compactor failed to get last checkpoint number")
lastCheckpointNum = -1
}
// Compute next checkpoint number.
// nextCheckpointNum is updated when
// - checkpointing starts, fails to start, or fails.
// - tries snapshot fails.
// NOTE: next checkpoint number must >= active segment num.
// We can't reuse mtrie state to checkpoint tries in older segments.
nextCheckpointNum := lastCheckpointNum + int(c.checkpointDistance)
if activeSegmentNum > nextCheckpointNum {
nextCheckpointNum = activeSegmentNum
}
ctx, cancel := context.WithCancel(context.Background())
Loop:
for {
select {
case doneCh := <-c.stopCh:
defer close(doneCh)
cancel()
break Loop
case checkpointResult := <-checkpointResultCh:
if checkpointResult.err != nil {
c.logger.Error().Err(checkpointResult.err).Msgf(
"compactor failed to checkpoint %d", checkpointResult.num,
)
// Retry checkpointing after active segment is finalized.
nextCheckpointNum = activeSegmentNum
}
case update, ok := <-c.trieUpdateCh:
if !ok {
// trieUpdateCh channel is closed.
// Wait for stop signal from c.stopCh
continue
}
var checkpointNum int
var checkpointTries []*trie.MTrie
activeSegmentNum, checkpointNum, checkpointTries =
c.processTrieUpdate(update, activeSegmentNum, nextCheckpointNum)
if checkpointTries == nil {
// Don't checkpoint yet because
// - not enough segments for checkpointing (nextCheckpointNum >= activeSegmentNum), or
// - failed to get ledger state snapshop (nextCheckpointNum < activeSegmentNum)
if nextCheckpointNum < activeSegmentNum {
nextCheckpointNum = activeSegmentNum
}
continue
}
// Try to checkpoint
if checkpointSem.TryAcquire(1) {
// Compute next checkpoint number
nextCheckpointNum = checkpointNum + int(c.checkpointDistance)
go func() {
defer checkpointSem.Release(1)
err := c.checkpoint(ctx, checkpointTries, checkpointNum)
checkpointResultCh <- checkpointResult{checkpointNum, err}
}()
} else {
// Failed to get semaphore because checkpointing is running.
// Try again when active segment is finalized.
c.logger.Info().Msgf("compactor delayed checkpoint %d because prior checkpointing is ongoing", nextCheckpointNum)
nextCheckpointNum = activeSegmentNum
}
}
}
// Drain and process remaining trie updates in channel.
for update := range c.trieUpdateCh {
_, _, err := c.wal.RecordUpdate(update.Update)
select {
case update.ResultCh <- err:
default:
}
}
// Don't wait for checkpointing to finish because it might take too long.
}
// processUpdateResult sends WAL update result using ResultCh channel
// and waits for signal from DoneCh channel.
// This ensures that WAL update and ledger state update are in sync.
func processUpdateResult(update *WALTrieUpdate, updateResult error) {
// Send result of WAL update
update.ResultCh <- updateResult
// Wait for trie update to complete
<-update.DoneCh
}
func (c *Compactor) checkpoint(ctx context.Context, tries []*trie.MTrie, checkpointNum int) error {
err := createCheckpoint(c.checkpointer, c.logger, tries, checkpointNum)
if err != nil {
return fmt.Errorf("cannot create checkpoints: %w", err)
}
// Return if context is canceled.
select {
case <-ctx.Done():
return nil
default:
}
err = cleanupCheckpoints(c.checkpointer, int(c.checkpointsToKeep))
if err != nil {
return fmt.Errorf("cannot cleanup checkpoints: %w", err)
}
if checkpointNum > 0 {
for observer := range c.observers {
// Don't notify observer if context is canceled.
// observer.OnComplete() is called when Compactor starts shutting down,
// which may close channel that observer.OnNext() uses to send data.
select {
case <-ctx.Done():
return nil
default:
observer.OnNext(checkpointNum)
}
}
}
return nil
}
func createCheckpoint(checkpointer *realWAL.Checkpointer, logger zerolog.Logger, tries []*trie.MTrie, checkpointNum int) error {
logger.Info().Msgf("serializing checkpoint %d", checkpointNum)
startTime := time.Now()
writer, err := checkpointer.CheckpointWriter(checkpointNum)
if err != nil {
return fmt.Errorf("cannot generate checkpoint writer: %w", err)
}
defer func() {
closeErr := writer.Close()
// Return close error if there isn't any prior error to return.
if err == nil {
err = closeErr
}
}()
err = realWAL.StoreCheckpoint(writer, tries...)
if err != nil {
return fmt.Errorf("error serializing checkpoint (%d): %w", checkpointNum, err)
}
duration := time.Since(startTime)
logger.Info().Float64("total_time_s", duration.Seconds()).Msgf("created checkpoint %d", checkpointNum)
return nil
}
func cleanupCheckpoints(checkpointer *realWAL.Checkpointer, checkpointsToKeep int) error {
// Don't list checkpoints if we keep them all
if checkpointsToKeep == 0 {
return nil
}
checkpoints, err := checkpointer.Checkpoints()
if err != nil {
return fmt.Errorf("cannot list checkpoints: %w", err)
}
if len(checkpoints) > int(checkpointsToKeep) {
// if condition guarantees this never fails
checkpointsToRemove := checkpoints[:len(checkpoints)-int(checkpointsToKeep)]
for _, checkpoint := range checkpointsToRemove {
err := checkpointer.RemoveCheckpoint(checkpoint)
if err != nil {
return fmt.Errorf("cannot remove checkpoint %d: %w", checkpoint, err)
}
}
}
return nil
}
// processTrieUpdate writes trie update to WAL, updates activeSegmentNum,
// and takes snapshot of ledger state for checkpointing if needed.
// It also sends WAL update result and waits for trie update completion.
func (c *Compactor) processTrieUpdate(
update *WALTrieUpdate,
activeSegmentNum int,
nextCheckpointNum int,
) (
_activeSegmentNum int,
checkpointNum int,
checkpointTries []*trie.MTrie,
) {
// RecordUpdate returns the segment number the record was written to.
// Returned segment number (>= 0) can be
// - the same as previous segment number (same segment), or
// - incremented by 1 from previous segment number (new segment)
segmentNum, skipped, updateErr := c.wal.RecordUpdate(update.Update)
// processUpdateResult must be called to ensure that ledger state update isn't blocked.
defer processUpdateResult(update, updateErr)
if activeSegmentNum == -1 {
// Recover from failure to get active segment number at initialization.
return segmentNum, -1, nil
}
if updateErr != nil || skipped || segmentNum == activeSegmentNum {
return activeSegmentNum, -1, nil
}
// In the remaining code: segmentNum > activeSegmentNum
// active segment is finalized.
// Check new segment number is incremented by 1
if segmentNum != activeSegmentNum+1 {
c.logger.Error().Msg(fmt.Sprintf("compactor got unexpected new segment numer %d, want %d", segmentNum, activeSegmentNum+1))
}
// Update activeSegmentNum
prevSegmentNum := activeSegmentNum
activeSegmentNum = segmentNum
if nextCheckpointNum > prevSegmentNum {
// Not enough segments for checkpointing
return activeSegmentNum, -1, nil
}
// In the remaining code: nextCheckpointNum == prevSegmentNum
// Enough segments are created for checkpointing
// Get ledger snapshot before sending WAL update result.
// At this point, ledger snapshot contains tries up to
// last update (logged as last record in finalized segment)
// Ledger doesn't include new trie for this update
// until WAL result is sent back.
tries, err := c.ledger.Tries()
if err != nil {
c.logger.Error().Err(err).Msg("compactor failed to get ledger tries")
return activeSegmentNum, -1, nil
}
checkpointNum = nextCheckpointNum
return activeSegmentNum, checkpointNum, tries
}