-
Notifications
You must be signed in to change notification settings - Fork 342
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
localstore: reduce critical section size on gc #1435
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ import ( | |
"time" | ||
|
||
"github.com/ethersphere/bee/pkg/shed" | ||
"github.com/ethersphere/bee/pkg/swarm" | ||
"github.com/syndtr/goleveldb/leveldb" | ||
) | ||
|
||
|
@@ -85,13 +86,20 @@ func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) { | |
} | ||
totalTimeMetric(db.metrics.TotalTimeCollectGarbage, start) | ||
}(time.Now()) | ||
|
||
batch := new(leveldb.Batch) | ||
target := db.gcTarget() | ||
|
||
// protect database from changing idexes and gcSize | ||
// tell the localstore to start logging dirty addresses | ||
db.batchMu.Lock() | ||
defer db.batchMu.Unlock() | ||
db.gcRunning = true | ||
db.batchMu.Unlock() | ||
|
||
defer func() { | ||
db.batchMu.Lock() | ||
db.gcRunning = false | ||
db.dirtyAddresses = nil | ||
db.batchMu.Unlock() | ||
}() | ||
|
||
// run through the recently pinned chunks and | ||
// remove them from the gcIndex before iterating through gcIndex | ||
|
@@ -109,6 +117,7 @@ func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) { | |
done = true | ||
first := true | ||
start := time.Now() | ||
candidates := make([]shed.Item, 0) | ||
err = db.gcIndex.Iterate(func(item shed.Item) (stop bool, err error) { | ||
if first { | ||
totalTimeMetric(db.metrics.TotalTimeGCFirstItem, start) | ||
|
@@ -118,39 +127,69 @@ func (db *DB) collectGarbage() (collectedCount uint64, done bool, err error) { | |
return true, nil | ||
} | ||
|
||
candidates = append(candidates, item) | ||
|
||
collectedCount++ | ||
if collectedCount >= gcBatchSize { | ||
// batch size limit reached, | ||
// another gc run is needed | ||
done = false | ||
return true, nil | ||
} | ||
return false, nil | ||
}, nil) | ||
if err != nil { | ||
return 0, false, err | ||
} | ||
db.metrics.GCCollectedCounter.Add(float64(collectedCount)) | ||
if testHookGCIteratorDone != nil { | ||
testHookGCIteratorDone() | ||
} | ||
|
||
// protect database from changing idexes and gcSize | ||
db.batchMu.Lock() | ||
defer totalTimeMetric(db.metrics.TotalTimeGCLock, time.Now()) | ||
defer db.batchMu.Unlock() | ||
|
||
// refresh gcSize value, since it might have | ||
// changed in the meanwhile | ||
gcSize, err = db.gcSize.Get() | ||
if err != nil { | ||
return 0, false, err | ||
} | ||
|
||
// get rid of dirty entries | ||
for _, item := range candidates { | ||
if swarm.NewAddress(item.Address).MemberOf(db.dirtyAddresses) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. the other way round, container is usually the receiver struct There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but then we'd need to introduce a new type alias for |
||
collectedCount-- | ||
if gcSize-collectedCount > target { | ||
done = false | ||
} | ||
continue | ||
} | ||
|
||
db.metrics.GCStoreTimeStamps.Set(float64(item.StoreTimestamp)) | ||
db.metrics.GCStoreAccessTimeStamps.Set(float64(item.AccessTimestamp)) | ||
|
||
// delete from retrieve, pull, gc | ||
err = db.retrievalDataIndex.DeleteInBatch(batch, item) | ||
if err != nil { | ||
return true, nil | ||
return 0, false, err | ||
zelig marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
err = db.retrievalAccessIndex.DeleteInBatch(batch, item) | ||
if err != nil { | ||
return true, nil | ||
return 0, false, err | ||
} | ||
err = db.pullIndex.DeleteInBatch(batch, item) | ||
if err != nil { | ||
return true, nil | ||
return 0, false, err | ||
} | ||
err = db.gcIndex.DeleteInBatch(batch, item) | ||
if err != nil { | ||
return true, nil | ||
return 0, false, err | ||
} | ||
collectedCount++ | ||
if collectedCount >= gcBatchSize { | ||
// bach size limit reached, | ||
// another gc run is needed | ||
done = false | ||
return true, nil | ||
} | ||
return false, nil | ||
}, nil) | ||
if err != nil { | ||
return 0, false, err | ||
} | ||
db.metrics.GCCollectedCounter.Add(float64(collectedCount)) | ||
db.metrics.GCCommittedCounter.Add(float64(collectedCount)) | ||
db.gcSize.PutInBatch(batch, gcSize-collectedCount) | ||
|
||
err = db.shed.WriteBatch(batch) | ||
|
@@ -286,3 +325,8 @@ func (db *DB) incGCSizeInBatch(batch *leveldb.Batch, change int64) (err error) { | |
// information when a garbage collection run is done | ||
// and how many items it removed. | ||
var testHookCollectGarbage func(collectedCount uint64) | ||
|
||
// testHookGCIteratorDone is a hook which is called | ||
// when the GC is done collecting candidate items for | ||
// eviction. | ||
var testHookGCIteratorDone func() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ import ( | |
"io/ioutil" | ||
"math/rand" | ||
"os" | ||
"sync" | ||
"testing" | ||
"time" | ||
|
||
|
@@ -716,3 +717,146 @@ func addRandomChunks(t *testing.T, count int, db *DB, pin bool) []swarm.Chunk { | |
} | ||
return chunks | ||
} | ||
|
||
// TestGC_NoEvictDirty checks that the garbage collection | ||
// does not evict chunks that are marked as dirty while the gc | ||
// is running. | ||
func TestGC_NoEvictDirty(t *testing.T) { | ||
// lower the maximal number of chunks in a single | ||
// gc batch to ensure multiple batches. | ||
defer func(s uint64) { gcBatchSize = s }(gcBatchSize) | ||
gcBatchSize = 2 | ||
|
||
chunkCount := 15 | ||
|
||
db := newTestDB(t, &Options{ | ||
Capacity: 10, | ||
}) | ||
|
||
closed := db.close | ||
|
||
testHookCollectGarbageChan := make(chan uint64) | ||
t.Cleanup(setTestHookCollectGarbage(func(collectedCount uint64) { | ||
select { | ||
case testHookCollectGarbageChan <- collectedCount: | ||
case <-closed: | ||
} | ||
})) | ||
|
||
dirtyChan := make(chan struct{}) | ||
incomingChan := make(chan struct{}) | ||
t.Cleanup(setTestHookGCIteratorDone(func() { | ||
incomingChan <- struct{}{} | ||
<-dirtyChan | ||
})) | ||
addrs := make([]swarm.Address, 0) | ||
mtx := new(sync.Mutex) | ||
online := make(chan struct{}) | ||
go func() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why need a diff go routine? you waiting for it to terminate anyway There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm waiting for it to be scheduled. There's no defer call on the |
||
close(online) // make sure this is scheduled, otherwise test might flake | ||
i := 0 | ||
for range incomingChan { | ||
// set a chunk to be updated in gc, resulting | ||
// in a removal from the gc round. but don't do this | ||
// for all chunks! | ||
if i < 2 { | ||
mtx.Lock() | ||
_, err := db.Get(context.Background(), storage.ModeGetRequest, addrs[i]) | ||
mtx.Unlock() | ||
if err != nil { | ||
t.Error(err) | ||
} | ||
i++ | ||
// we sleep so that the async update to gc index | ||
// happens and that the dirtyAddresses get updated | ||
time.Sleep(100 * time.Millisecond) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why sleep here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because |
||
} | ||
dirtyChan <- struct{}{} | ||
} | ||
|
||
}() | ||
<-online | ||
// upload random chunks | ||
for i := 0; i < chunkCount; i++ { | ||
ch := generateTestRandomChunk() | ||
|
||
_, err := db.Put(context.Background(), storage.ModePutUpload, ch) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
err = db.Set(context.Background(), storage.ModeSetSync, ch.Address()) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
mtx.Lock() | ||
addrs = append(addrs, ch.Address()) | ||
mtx.Unlock() | ||
} | ||
Comment on lines
+780
to
+795
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but then we'd still need to create a for loop to get the individual chunk addresses so that we could |
||
|
||
gcTarget := db.gcTarget() | ||
for { | ||
select { | ||
case <-testHookCollectGarbageChan: | ||
case <-time.After(10 * time.Second): | ||
t.Error("collect garbage timeout") | ||
} | ||
gcSize, err := db.gcSize.Get() | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
if gcSize == gcTarget { | ||
break | ||
} | ||
} | ||
|
||
t.Run("pull index count", newItemsCountTest(db.pullIndex, int(gcTarget))) | ||
|
||
t.Run("gc index count", newItemsCountTest(db.gcIndex, int(gcTarget))) | ||
|
||
t.Run("gc size", newIndexGCSizeTest(db)) | ||
|
||
// the first synced chunk should be removed | ||
t.Run("get the first two chunks, third is gone", func(t *testing.T) { | ||
_, err := db.Get(context.Background(), storage.ModeGetRequest, addrs[0]) | ||
if err != nil { | ||
t.Error("got error but expected none") | ||
} | ||
_, err = db.Get(context.Background(), storage.ModeGetRequest, addrs[1]) | ||
if err != nil { | ||
t.Error("got error but expected none") | ||
} | ||
_, err = db.Get(context.Background(), storage.ModeGetRequest, addrs[2]) | ||
if !errors.Is(err, storage.ErrNotFound) { | ||
t.Errorf("expected err not found but got %v", err) | ||
} | ||
}) | ||
|
||
t.Run("only later inserted chunks should be removed", func(t *testing.T) { | ||
for i := 2; i < (chunkCount - int(gcTarget)); i++ { | ||
_, err := db.Get(context.Background(), storage.ModeGetRequest, addrs[i]) | ||
if !errors.Is(err, storage.ErrNotFound) { | ||
t.Errorf("got error %v, want %v", err, storage.ErrNotFound) | ||
} | ||
} | ||
}) | ||
|
||
// last synced chunk should not be removed | ||
t.Run("get most recent synced chunk", func(t *testing.T) { | ||
_, err := db.Get(context.Background(), storage.ModeGetRequest, addrs[len(addrs)-1]) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
}) | ||
|
||
} | ||
|
||
// setTestHookGCIteratorDone sets testHookGCIteratorDone and | ||
// returns a function that will reset it to the | ||
// value before the change. | ||
func setTestHookGCIteratorDone(h func()) (reset func()) { | ||
current := testHookGCIteratorDone | ||
reset = func() { testHookGCIteratorDone = current } | ||
testHookGCIteratorDone = h | ||
return reset | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
candidates array should be package level preallocated with gcBatchSize length
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree this will save on allocations, but then the cleanup would be ugly since we'd want to get rid of entries from the previous iteration, so in order to actually save the allocation we'd have to iterate over every item in the slice, setting it to
nil
, otherwise we'd have danglingshed.Item
s in memory.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
cc @janos
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
:) just remember how many you fill in, no need to go through it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I could work nicely, just to make sure that every time before the slice is sliced to zero length, it is iterated up to its length and set every element to "zero" Item. Zero item is the item with all slices as nil (important for memory usage) and integers as 0 (not important for memory usage). That way, no dangling Items will exist. Basically a type that would be
[]shed.Item
withreset
method andcontains
method which would replaceswrm.Address.MemberOf
as per other @zelig's comment which I agree on, also. Somehow, this container naturally imposes a new type, even for future improvements as the data structure may change, as suggested in the description.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so.... not sure if i understand but is this a blocker?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is incorrect, since you will have danging items on the last gc run after which the gc target has been met, leaving dangling
shed.Item
s in slice. We therefore have to iterate over the whole slice every time we finish GCing (either in the last iteration, or on every iteration) for those items to not be referenced anymore, making them available for garbage collection.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@acud For the first iteration it is also fine as it is, for me.