Skip to content

Commit

Permalink
Merge pull request #72 from aarshkshah1992/feat/periodic-gc
Browse files Browse the repository at this point in the history
periodic GC for badger datastore
  • Loading branch information
Stebalien authored Oct 11, 2019
2 parents 0bb48f3 + 5cc6d95 commit 8a41782
Showing 1 changed file with 66 additions and 8 deletions.
74 changes: 66 additions & 8 deletions datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ type Datastore struct {
closing chan struct{}

gcDiscardRatio float64
maxGcDuration time.Duration
gcInterval time.Duration
}

// Implements the datastore.Txn interface, enabling transaction support for
Expand All @@ -42,7 +44,12 @@ type txn struct {

// Options are the badger datastore options, reexported here for convenience.
type Options struct {
gcDiscardRatio float64
// Please refer to the Badger docs to see what this is for
GcDiscardRatio float64
// Maximum duration to perform a single GC cycle for
MaxGcDuration time.Duration
// Interval between GC cycles
GcInterval time.Duration

badger.Options
}
Expand All @@ -52,7 +59,9 @@ var DefaultOptions Options

func init() {
DefaultOptions = Options{
gcDiscardRatio: 0.1,
GcDiscardRatio: 0.2,
MaxGcDuration: 1 * time.Minute,
GcInterval: 45 * time.Minute,
Options: badger.DefaultOptions(""),
}
DefaultOptions.Options.CompactL0OnClose = false
Expand All @@ -62,6 +71,7 @@ func init() {
var _ ds.Datastore = (*Datastore)(nil)
var _ ds.TxnDatastore = (*Datastore)(nil)
var _ ds.TTLDatastore = (*Datastore)(nil)
var _ ds.GCDatastore = (*Datastore)(nil)

// NewDatastore creates a new badger datastore.
//
Expand All @@ -70,12 +80,18 @@ func NewDatastore(path string, options *Options) (*Datastore, error) {
// Copy the options because we modify them.
var opt badger.Options
var gcDiscardRatio float64
var maxGcDuration time.Duration
var gcInterval time.Duration
if options == nil {
opt = badger.DefaultOptions("")
gcDiscardRatio = DefaultOptions.gcDiscardRatio
gcDiscardRatio = DefaultOptions.GcDiscardRatio
maxGcDuration = DefaultOptions.MaxGcDuration
gcInterval = DefaultOptions.GcInterval
} else {
opt = options.Options
gcDiscardRatio = options.gcDiscardRatio
gcDiscardRatio = options.GcDiscardRatio
maxGcDuration = options.MaxGcDuration
gcInterval = options.GcInterval
}

opt.Dir = path
Expand All @@ -90,11 +106,32 @@ func NewDatastore(path string, options *Options) (*Datastore, error) {
return nil, err
}

return &Datastore{
ds := &Datastore{
DB: kv,
closing: make(chan struct{}),
gcDiscardRatio: gcDiscardRatio,
}, nil
maxGcDuration: maxGcDuration,
gcInterval: gcInterval,
}

// schedule periodic GC till db is closed
go ds.periodicGC()

return ds, nil
}

// Keep scheduling GC's AFTER `gcInterval` has passed since the previous GC
func (d *Datastore) periodicGC() {
for {
select {
case <-time.After(d.gcInterval):
if err := d.CollectGarbage(); err != nil {
log.Warningf("error during a GC cycle: %s", err)
}
case <-d.closing:
return
}
}
}

// NewTransaction starts a new transaction. The resulting transaction object
Expand Down Expand Up @@ -275,17 +312,38 @@ func (d *Datastore) Batch() (ds.Batch, error) {
return tx, nil
}

func (d *Datastore) CollectGarbage() error {
func (d *Datastore) CollectGarbage() (err error) {
d.closeLk.RLock()
defer d.closeLk.RUnlock()
if d.closed {
return ErrClosed
}

err := d.DB.RunValueLogGC(d.gcDiscardRatio)
gcTimeout := time.NewTimer(d.maxGcDuration)
defer gcTimeout.Stop()

// The idea is to keep calling DB.RunValueLogGC() till Badger no longer has any log files
// to GC(which would be indicated by an error, please refer to Badger GC docs). The timeout is to
// ensure we do not keep calling GC in case Badger has accumulated
// excessive garbage. However, we will finish earlier if Badger has nothing left to GC.
LOOP:
for {
select {
case <-gcTimeout.C:
break LOOP
default:
if err == nil {
err = d.DB.RunValueLogGC(d.gcDiscardRatio)
} else {
break LOOP
}
}
}

if err == badger.ErrNoRewrite {
err = nil
}

return err
}

Expand Down

0 comments on commit 8a41782

Please sign in to comment.