diff --git a/core/commands/repo.go b/core/commands/repo.go index 5ca8d35e0bc..d9d8501e4c1 100644 --- a/core/commands/repo.go +++ b/core/commands/repo.go @@ -69,7 +69,11 @@ order to reclaim hard disk space. streamErrors, _, _ := res.Request().Option("stream-errors").Bool() - gcOutChan := corerepo.GarbageCollectAsync(n, req.Context()) + gcOutChan, err := corerepo.GarbageCollectAsync(req.Context(), n) + if err != nil { + res.SetError(err, cmds.ErrNormal) + return + } outChan := make(chan interface{}, cap(gcOutChan)) res.SetOutput((<-chan interface{})(outChan)) diff --git a/core/corerepo/gc.go b/core/corerepo/gc.go index 6538bc14436..94bda78a00b 100644 --- a/core/corerepo/gc.go +++ b/core/corerepo/gc.go @@ -79,14 +79,13 @@ func BestEffortRoots(filesRoot *mfs.Root) ([]*cid.Cid, error) { return []*cid.Cid{rootDag.Cid()}, nil } -func GarbageCollect(n *core.IpfsNode, ctx context.Context) error { +func GarbageCollect(ctx context.Context, n *core.IpfsNode) error { ctx, cancel := context.WithCancel(ctx) - defer cancel() // in case error occurs during operation - roots, err := BestEffortRoots(n.FilesRoot) + defer cancel() + rmed, err := GarbageCollectAsync(ctx, n) if err != nil { return err } - rmed := gc.GC(ctx, n.Blockstore, n.DAG, n.Pinning, roots) return CollectResult(ctx, rmed, nil) } @@ -145,16 +144,22 @@ func (e *MultiError) Error() string { return buf.String() } -func GarbageCollectAsync(n *core.IpfsNode, ctx context.Context) <-chan gc.Result { - roots, err := BestEffortRoots(n.FilesRoot) +func GarbageCollectAsync(ctx context.Context, n *core.IpfsNode) (<-chan gc.Result, error) { + g, err := gc.NewGC(n.Blockstore, n.DAG) if err != nil { - out := make(chan gc.Result) - out <- gc.Result{Error: err} - close(out) - return out + return nil, err + } + + err = g.AddPinSource(n.Pinning.PinSources()...) + if err != nil { + return nil, err + } + err = g.AddPinSource(*n.FilesRoot.PinSource()) + if err != nil { + return nil, err } - return gc.GC(ctx, n.Blockstore, n.DAG, n.Pinning, roots) + return g.Run(ctx), nil } func PeriodicGC(ctx context.Context, node *core.IpfsNode) error { @@ -217,7 +222,7 @@ func (gc *GC) maybeGC(ctx context.Context, offset uint64) error { log.Info("Watermark exceeded. Starting repo GC...") defer log.EventBegin(ctx, "repoGC").Done() - if err := GarbageCollect(gc.Node, ctx); err != nil { + if err := GarbageCollect(ctx, gc.Node); err != nil { return err } log.Infof("Repo GC done. See `ipfs repo stat` to see how much space got freed.\n") diff --git a/core/coreunix/add_test.go b/core/coreunix/add_test.go index add4395eb5e..8b48b17580b 100644 --- a/core/coreunix/add_test.go +++ b/core/coreunix/add_test.go @@ -20,9 +20,9 @@ import ( "github.com/ipfs/go-ipfs/repo/config" ds2 "github.com/ipfs/go-ipfs/thirdparty/datastore2" pi "github.com/ipfs/go-ipfs/thirdparty/posinfo" - "gx/ipfs/QmSn9Td7xgxm9EV7iEjTckpUWmWApggzPxu7eFGWkkpwin/go-block-format" cid "gx/ipfs/QmNp85zy9RLrQ5oQD4hPyS39ezrrXpcaa7R4Y9kxdWQLLQ/go-cid" + blocks "gx/ipfs/QmSn9Td7xgxm9EV7iEjTckpUWmWApggzPxu7eFGWkkpwin/go-block-format" ) func TestAddRecursive(t *testing.T) { @@ -46,6 +46,9 @@ func TestAddRecursive(t *testing.T) { } func TestAddGCLive(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + r := &repo.Mock{ C: config.Config{ Identity: config.Identity{ @@ -54,13 +57,13 @@ func TestAddGCLive(t *testing.T) { }, D: ds2.ThreadSafeCloserMapDatastore(), } - node, err := core.NewNode(context.Background(), &core.BuildCfg{Repo: r}) + node, err := core.NewNode(ctx, &core.BuildCfg{Repo: r}) if err != nil { t.Fatal(err) } out := make(chan interface{}) - adder, err := NewAdder(context.Background(), node.Pinning, node.Blockstore, node.DAG) + adder, err := NewAdder(ctx, node.Pinning, node.Blockstore, node.DAG) if err != nil { t.Fatal(err) } @@ -98,11 +101,18 @@ func TestAddGCLive(t *testing.T) { t.Fatal("add shouldnt complete yet") } + g, err := gc.NewGC(node.Blockstore, node.DAG) + if err != nil { + t.Fatal(err) + } + g.AddPinSource(node.Pinning.PinSources()...) + var gcout <-chan gc.Result gcstarted := make(chan struct{}) + go func() { defer close(gcstarted) - gcout = gc.GC(context.Background(), node.Blockstore, node.DAG, node.Pinning, nil) + gcout = g.Run(ctx) }() // gc shouldnt start until we let the add finish its current file. @@ -126,6 +136,7 @@ func TestAddGCLive(t *testing.T) { <-gcstarted for r := range gcout { + t.Logf("gc res: %v", r) if r.Error != nil { t.Fatal(err) } @@ -144,11 +155,11 @@ func TestAddGCLive(t *testing.T) { last = c } - ctx, cancel := context.WithTimeout(context.Background(), time.Second*5) + ctx2, cancel := context.WithTimeout(ctx, time.Second*5) defer cancel() set := cid.NewSet() - err = dag.EnumerateChildren(ctx, node.DAG.GetLinks, last, set.Visit) + err = dag.EnumerateChildren(ctx2, node.DAG.GetLinks, last, set.Visit) if err != nil { t.Fatal(err) } diff --git a/mfs/system.go b/mfs/system.go index fc5be0f6e2f..0397d50e2f1 100644 --- a/mfs/system.go +++ b/mfs/system.go @@ -17,6 +17,7 @@ import ( "time" dag "github.com/ipfs/go-ipfs/merkledag" + pin "github.com/ipfs/go-ipfs/pin" ft "github.com/ipfs/go-ipfs/unixfs" cid "gx/ipfs/QmNp85zy9RLrQ5oQD4hPyS39ezrrXpcaa7R4Y9kxdWQLLQ/go-cid" @@ -123,6 +124,24 @@ func (kr *Root) Flush() error { return nil } +// PinSource returns information about pinning requirements. +func (kr *Root) PinSource() *pin.Source { + return &pin.Source{ + Get: func() ([]*cid.Cid, error) { + err := kr.Flush() + if err != nil { + return nil, err + } + + nd, err := kr.GetValue().GetNode() + if err != nil { + return nil, err + } + return []*cid.Cid{nd.Cid()}, nil + }, + } +} + // closeChild implements the childCloser interface, and signals to the publisher that // there are changes ready to be published func (kr *Root) closeChild(name string, nd node.Node, sync bool) error { diff --git a/pin/gc/gc.go b/pin/gc/gc.go index c9e6cac706a..41c9b35d1c4 100644 --- a/pin/gc/gc.go +++ b/pin/gc/gc.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "sort" bstore "github.com/ipfs/go-ipfs/blocks/blockstore" dag "github.com/ipfs/go-ipfs/merkledag" @@ -16,13 +17,6 @@ import ( var log = logging.Logger("gc") -// Result represents an incremental output from a garbage collection -// run. It contains either an error, or the cid of a removed object. -type Result struct { - KeyRemoved *cid.Cid - Error error -} - // GC performs a mark and sweep garbage collection of the blocks in the blockstore // first, it creates a 'marked' set and adds to it the following: // - all recursively pinned blocks, plus all of their descendants (recursively) @@ -33,146 +27,232 @@ type Result struct { // The routine then iterates over every block in the blockstore and // deletes any block that is not found in the marked set. // -func GC(ctx context.Context, bs bstore.GCBlockstore, ls dag.LinkService, pn pin.Pinner, bestEffortRoots []*cid.Cid) <-chan Result { +type GC interface { + AddPinSource(...pin.Source) error + Run(ctx context.Context) <-chan Result +} - elock := log.EventBegin(ctx, "GC.lockWait") - unlocker := bs.GCLock() - elock.Done() - elock = log.EventBegin(ctx, "GC.locked") - emark := log.EventBegin(ctx, "GC.mark") +type gctype struct { + bs bstore.GCBlockstore + ls dag.LinkService - ls = ls.GetOfflineLinkService() + roots []pin.Source +} + +var _ GC = (*gctype)(nil) + +// NewGC creates new instance of garbage collector +func NewGC(bs bstore.GCBlockstore, ls dag.LinkService) (GC, error) { + return &gctype{ + bs: bs, + ls: ls.GetOfflineLinkService(), + }, nil +} +// AddPinSource adds as pin.Source to be considered by the GC. +// Any calls to AddPinSource have to be done before any calls to Run. +func (g *gctype) AddPinSource(s ...pin.Source) error { + g.roots = append(g.roots, s...) + sort.SliceStable(g.roots, func(i, j int) bool { + return g.roots[i].SortValue() < g.roots[j].SortValue() + }) + + return nil +} + +// Run starts the garbage collector and continues it async +func (g *gctype) Run(ctx context.Context) <-chan Result { output := make(chan Result, 128) + tri := newTriset() - go func() { - defer close(output) - defer unlocker.Unlock() - defer elock.Done() + emark := log.EventBegin(ctx, "GC.mark") - gcs, err := ColoredSet(ctx, pn, ls, bestEffortRoots, output) - if err != nil { - output <- Result{Error: err} - return + for _, r := range g.roots { + // Stop adding roots at first Direct pin + if r.Direct { + break } - emark.Append(logging.LoggableMap{ - "blackSetSize": fmt.Sprintf("%d", gcs.Len()), - }) - emark.Done() - esweep := log.EventBegin(ctx, "GC.sweep") - - keychan, err := bs.AllKeysChan(ctx) + cids, err := r.Get() if err != nil { output <- Result{Error: err} - return + return output } - - errors := false - var removed uint64 - - loop: - for { - select { - case k, ok := <-keychan: - if !ok { - break loop - } - if !gcs.Has(k) { - err := bs.DeleteBlock(k) - removed++ - if err != nil { - errors = true - output <- Result{Error: &CannotDeleteBlockError{k, err}} - //log.Errorf("Error removing key from blockstore: %s", err) - // continue as error is non-fatal - continue loop - } - select { - case output <- Result{KeyRemoved: k}: - case <-ctx.Done(): - break loop - } - } - case <-ctx.Done(): - break loop - } + for _, c := range cids { + tri.InsertGray(c, r.Strict) } - esweep.Append(logging.LoggableMap{ - "whiteSetSize": fmt.Sprintf("%d", removed), - }) - esweep.Done() - if errors { - output <- Result{Error: ErrCannotDeleteSomeBlocks} - } - }() + } + go g.gcAsync(ctx, emark, output, tri) return output } -func Descendants(ctx context.Context, getLinks dag.GetLinks, set *cid.Set, roots []*cid.Cid) error { - for _, c := range roots { - set.Add(c) +func (g *gctype) gcAsync(ctx context.Context, emark *logging.EventInProgress, + output chan Result, tri *triset) { - // EnumerateChildren recursively walks the dag and adds the keys to the given set - err := dag.EnumerateChildren(ctx, getLinks, c, set.Visit) - if err != nil { - return err + defer close(output) + + bestEffortGetLinks := func(ctx context.Context, cid *cid.Cid) ([]*node.Link, error) { + links, err := g.ls.GetLinks(ctx, cid) + if err != nil && err != dag.ErrNotFound { + return nil, &CannotFetchLinksError{cid, err} } + return links, nil } - return nil -} - -// ColoredSet computes the set of nodes in the graph that are pinned by the -// pins in the given pinner. -func ColoredSet(ctx context.Context, pn pin.Pinner, ls dag.LinkService, bestEffortRoots []*cid.Cid, output chan<- Result) (*cid.Set, error) { - // KeySet currently implemented in memory, in the future, may be bloom filter or - // disk backed to conserve memory. - errors := false - gcs := cid.NewSet() getLinks := func(ctx context.Context, cid *cid.Cid) ([]*node.Link, error) { - links, err := ls.GetLinks(ctx, cid) + links, err := g.ls.GetLinks(ctx, cid) if err != nil { - errors = true - output <- Result{Error: &CannotFetchLinksError{cid, err}} + return nil, &CannotFetchLinksError{cid, err} } return links, nil } - err := Descendants(ctx, getLinks, gcs, pn.RecursiveKeys()) + + var criticalError error + defer func() { + if criticalError != nil { + output <- Result{Error: criticalError} + } + }() + + // Enumerate without the lock + for { + finished, err := tri.EnumerateStep(ctx, bestEffortGetLinks, getLinks) + if err != nil { + output <- Result{Error: err} + criticalError = ErrCannotFetchAllLinks + } + if finished { + break + } + } + if criticalError != nil { + return + } + + // Add white objects + keychan, err := g.bs.AllKeysChan(ctx) if err != nil { - errors = true output <- Result{Error: err} + return } - bestEffortGetLinks := func(ctx context.Context, cid *cid.Cid) ([]*node.Link, error) { - links, err := ls.GetLinks(ctx, cid) - if err != nil && err != dag.ErrNotFound { - errors = true - output <- Result{Error: &CannotFetchLinksError{cid, err}} +loop: + for { + select { + case c, ok := <-keychan: + if !ok { + break loop + } + tri.InsertFresh(c) + case <-ctx.Done(): + fmt.Printf("ctx done\n") + output <- Result{Error: ctx.Err()} + return } - return links, nil } - err = Descendants(ctx, bestEffortGetLinks, gcs, bestEffortRoots) - if err != nil { - errors = true - output <- Result{Error: err} + + // Take the lock + unlocker, elock := getGCLock(ctx, g.bs) + + defer unlocker.Unlock() + defer elock.Done() + + // Add the roots again, they might have changed + for _, r := range g.roots { + cids, err := r.Get() + if err != nil { + criticalError = err + return + } + for _, c := range cids { + if !r.Direct { + tri.InsertGray(c, r.Strict) + } else { + // this special case prevents incremental and concurrent GC + tri.blacken(c, enumStrict) + } + } } - for _, k := range pn.DirectKeys() { - gcs.Add(k) + // Reenumerate, fast as most will be duplicate + for { + finished, err := tri.EnumerateStep(ctx, getLinks, bestEffortGetLinks) + if err != nil { + output <- Result{Error: err} + criticalError = ErrCannotFetchAllLinks + } + if finished { + break + } } - err = Descendants(ctx, getLinks, gcs, pn.InternalPins()) - if err != nil { - errors = true - output <- Result{Error: err} + if criticalError != nil { + return + } + + emark.Done() + esweep := log.EventBegin(ctx, "GC.sweep") + + var whiteSetSize, blackSetSize uint64 + +loop2: + for v, e := range tri.colmap { + if e.getColor() != tri.white { + blackSetSize++ + continue + } + whiteSetSize++ + + c, err := cid.Cast([]byte(v)) + if err != nil { + // this should not happen + panic("error in cast of cid: " + err.Error()) + } + + err = g.bs.DeleteBlock(c) + if err != nil { + output <- Result{Error: &CannotDeleteBlockError{c, err}} + criticalError = ErrCannotDeleteSomeBlocks + continue + } + select { + case output <- Result{KeyRemoved: c}: + case <-ctx.Done(): + break loop2 + } + } + + esweep.Append(logging.LoggableMap{ + "whiteSetSize": fmt.Sprintf("%d", whiteSetSize), + "blackSetSize": fmt.Sprintf("%d", blackSetSize), + }) + esweep.Done() +} + +// Result represents an incremental output from a garbage collection +// run. It contains either an error, or the cid of a removed object. +type Result struct { + KeyRemoved *cid.Cid + Error error +} + +func getGCLock(ctx context.Context, bs bstore.GCBlockstore) (bstore.Unlocker, *logging.EventInProgress) { + elock := log.EventBegin(ctx, "GC.lockWait") + unlocker := bs.GCLock() + elock.Done() + elock = log.EventBegin(ctx, "GC.locked") + return unlocker, elock +} + +func addRoots(tri *triset, pn pin.Pinner, bestEffortRoots []*cid.Cid) { + for _, v := range bestEffortRoots { + tri.InsertGray(v, false) } - if errors { - return nil, ErrCannotFetchAllLinks + for _, v := range pn.RecursiveKeys() { + tri.InsertGray(v, true) } - return gcs, nil } var ErrCannotFetchAllLinks = errors.New("garbage collection aborted: could not retrieve some links") diff --git a/pin/gc/gc_test.go b/pin/gc/gc_test.go new file mode 100644 index 00000000000..212cfe669f6 --- /dev/null +++ b/pin/gc/gc_test.go @@ -0,0 +1,39 @@ +package gc + +import ( + "testing" + + "github.com/ipfs/go-ipfs/pin" +) + +func TestPinSources(t *testing.T) { + gc := &gctype{} + sources := []pin.Source{ + pin.Source{Direct: true}, + pin.Source{Internal: true}, + pin.Source{Strict: true}, + pin.Source{}, + } + err := gc.AddPinSource(sources...) + if err != nil { + t.Fatal(err) + } + + p := gc.roots[0] + if !p.Strict { + t.Errorf("first root should be strict, was %v", p) + } + p = gc.roots[1] + if p.Strict || p.Direct || p.Internal { + t.Errorf("second root should be normal, was %v", p) + } + p = gc.roots[2] + if !p.Direct { + t.Errorf("third root should be direct, was %v", p) + } + p = gc.roots[3] + if !p.Internal { + t.Errorf("fourth root should be internal, was %v", p) + } + +} diff --git a/pin/gc/triset.go b/pin/gc/triset.go new file mode 100644 index 00000000000..d9172c98e2e --- /dev/null +++ b/pin/gc/triset.go @@ -0,0 +1,160 @@ +package gc + +import ( + "context" + + dag "github.com/ipfs/go-ipfs/merkledag" + + cid "gx/ipfs/QmNp85zy9RLrQ5oQD4hPyS39ezrrXpcaa7R4Y9kxdWQLLQ/go-cid" +) + +const ( + colorNull trielement = iota + color1 + color2 + color3 + colorMask = 0x3 +) + +const ( + // enum from enumerator + // stricter enumerators should have higer value than those less strict + enumFast trielement = 0 + enumStrict trielement = 1 << 2 + enumMask trielement = enumStrict +) + +// this const is used to enable runtime check for things that should never happen +// will cause panic if they happen +const pedantic = true + +// try to keep trielement as small as possible +// 8bit color is overkill, 3 bits would be enough, but additional functionality +// will probably increase it future +// if ever it blows over 64bits total size change the colmap in triset to use +// pointers onto this structure +type trielement uint8 + +func (t trielement) getColor() trielement { + return t & colorMask +} + +func (t trielement) getEnum() trielement { + return t & enumMask +} + +type triset struct { + // colors are per triset allowing fast color swap after sweep, + // the update operation in map of structs is about 3x as fast + // as insert and requres 0 allocations (keysize allocation in case of insert) + white, gray, black trielement + + freshColor trielement + + // grays is used as stack to enumerate elements that are still gray + grays []*cid.Cid + + // if item doesn't exist in the colmap it is treated as white + colmap map[string]trielement +} + +func newTriset() *triset { + tr := &triset{ + white: color1, + gray: color2, + black: color3, + + grays: make([]*cid.Cid, 0, 1<<10), + colmap: make(map[string]trielement), + } + + tr.freshColor = tr.white + return tr +} + +// InsertFresh inserts fresh item into a set +// it marks it with freshColor if it is currently white +func (tr *triset) InsertFresh(c *cid.Cid) { + e := tr.colmap[c.KeyString()] + cl := e.getColor() + + // conditions to change the element: + // 1. does not exist in set + // 2. is white and fresh color is different + if cl == colorNull || (cl == tr.white && tr.freshColor != tr.white) { + tr.colmap[c.KeyString()] = trielement(tr.freshColor) + } +} + +// InsertWhite inserts white item into a set if it doesn't exist +func (tr *triset) InsertWhite(c *cid.Cid) { + _, ok := tr.colmap[c.KeyString()] + if !ok { + tr.colmap[c.KeyString()] = trielement(tr.white) + } +} + +// InsertGray inserts new item into set as gray or turns white item into gray +// strict arguemnt is used to signify the the garbage collector that this +// DAG must be enumerated fully, any non aviable objects must stop the progress +// and error out +func (tr *triset) InsertGray(c *cid.Cid, strict bool) { + newEnum := enumFast + if strict { + newEnum = enumStrict + } + + e := tr.colmap[c.KeyString()] + cl := e.getColor() + // conditions are: + // 1. empty + // 2. white + // 3. insufficient strictness + if cl == colorNull || cl == tr.white || (e.getEnum() < newEnum) { + tr.colmap[c.KeyString()] = trielement(tr.gray | newEnum) + if cl != tr.gray { + tr.grays = append(tr.grays, c) + } + } +} + +func (tr *triset) blacken(c *cid.Cid, strict trielement) { + tr.colmap[c.KeyString()] = trielement(tr.black | strict) +} + +// EnumerateStep performs one Links lookup in search for elements to gray out +// it returns error is the getLinks function errors +// if the gray set is empty after this step it returns (true, nil) +func (tr *triset) EnumerateStep(ctx context.Context, getLinks dag.GetLinks, getLinksStrict dag.GetLinks) (bool, error) { + var c *cid.Cid + var e trielement + for next := true; next; next = e.getColor() != tr.gray { + if len(tr.grays) == 0 { + return true, nil + } + // get element from top of queue + c = tr.grays[len(tr.grays)-1] + e = tr.colmap[c.KeyString()] + tr.grays = tr.grays[:len(tr.grays)-1] + } + + strict := e.getEnum() == enumStrict + + // select getLinks method + gL := getLinks + if strict { + gL = getLinksStrict + } + + links, err := gL(ctx, c) + if err != nil { + return false, err + } + + tr.blacken(c, e.getEnum()) + for _, l := range links { + tr.InsertGray(l.Cid, strict) + } + + return len(tr.grays) == 0, nil +} diff --git a/pin/gc/triset_gobench_test.go b/pin/gc/triset_gobench_test.go new file mode 100644 index 00000000000..cc378072734 --- /dev/null +++ b/pin/gc/triset_gobench_test.go @@ -0,0 +1,106 @@ +package gc + +import ( + "math/rand" + "testing" +) + +func BenchmarkMapInserts(b *testing.B) { + b.N = 10e6 + keys := make([]string, b.N) + buf := make([]byte, 64) + for i := 0; i < b.N; i++ { + _, err := rand.Read(buf) + if err != nil { + b.Fatal(err) + } + keys[i] = string(buf) + } + + set := make(map[string]trielement) + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + set[keys[i]] = trielement(1) + } +} +func BenchmarkMapUpdate(b *testing.B) { + b.N = 10e6 + keys := make([]string, b.N) + buf := make([]byte, 64) + for i := 0; i < b.N; i++ { + _, err := rand.Read(buf) + if err != nil { + b.Fatal(err) + } + keys[i] = string(buf) + } + + set := make(map[string]trielement) + for i := 0; i < b.N; i++ { + set[keys[i]] = trielement(1) + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + set[keys[i]] = trielement(2) + } +} + +type testint uint8 +type teststruc struct { + u uint8 +} + +func BenchmarkMapUpdateUint8(b *testing.B) { + b.N = 10e6 + keys := make([]string, b.N) + buf := make([]byte, 64) + for i := 0; i < b.N; i++ { + _, err := rand.Read(buf) + if err != nil { + b.Fatal(err) + } + keys[i] = string(buf) + } + + set := make(map[string]testint) + for i := 0; i < b.N; i++ { + set[keys[i]] = testint(i) + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + set[keys[i]] = testint(i) + } +} + +func BenchmarkMapUpdateStruct(b *testing.B) { + b.N = 10e6 + keys := make([]string, b.N) + buf := make([]byte, 64) + for i := 0; i < b.N; i++ { + _, err := rand.Read(buf) + if err != nil { + b.Fatal(err) + } + keys[i] = string(buf) + } + + set := make(map[string]teststruc) + for i := 0; i < b.N; i++ { + set[keys[i]] = teststruc{uint8(i)} + } + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + set[keys[i]] = teststruc{uint8(i)} + } +} diff --git a/pin/gc/triset_test.go b/pin/gc/triset_test.go new file mode 100644 index 00000000000..267b7fcfa8c --- /dev/null +++ b/pin/gc/triset_test.go @@ -0,0 +1,29 @@ +package gc + +import ( + "testing" + + "github.com/ipfs/go-ipfs/blocks/blocksutil" + + cid "gx/ipfs/QmNp85zy9RLrQ5oQD4hPyS39ezrrXpcaa7R4Y9kxdWQLLQ/go-cid" +) + +func TestInsertWhite(t *testing.T) { + tri := newTriset() + blkgen := blocksutil.NewBlockGenerator() + + whites := make([]*cid.Cid, 1000) + for i := range whites { + blk := blkgen.Next() + whites[i] = blk.Cid() + + tri.InsertWhite(blk.Cid()) + } + + for _, v := range whites { + if tri.colmap[v.KeyString()].getColor() != tri.white { + t.Errorf("cid %s should be white and is not %s", v, tri.colmap[v.KeyString()]) + } + } + +} diff --git a/pin/pin.go b/pin/pin.go index ce3722aeb34..2a9160eeaed 100644 --- a/pin/pin.go +++ b/pin/pin.go @@ -110,6 +110,9 @@ type Pinner interface { DirectKeys() []*cid.Cid RecursiveKeys() []*cid.Cid InternalPins() []*cid.Cid + + // References of pins for GC + PinSources() []Source } type Pinned struct { @@ -454,6 +457,39 @@ func (p *pinner) RecursiveKeys() []*cid.Cid { return p.recursePin.Keys() } +// InternalPins returns slice of cids that are part of internal pin structure +func (p *pinner) InternalPins() []*cid.Cid { + p.lock.Lock() + defer p.lock.Unlock() + var out []*cid.Cid + out = append(out, p.internalPin.Keys()...) + return out +} + +func (p *pinner) PinSources() []Source { + nilErr := func(p func() []*cid.Cid) func() ([]*cid.Cid, error) { + return func() ([]*cid.Cid, error) { return p(), nil } + } + + return []Source{ + { + Get: nilErr(p.RecursiveKeys), + Strict: true, + }, + { + Get: nilErr(p.DirectKeys), + Strict: true, + Direct: true, + }, + { + Get: nilErr(p.InternalPins), + Strict: true, + Internal: true, + }, + } + +} + func (p *pinner) Update(ctx context.Context, from, to *cid.Cid, unpin bool) error { p.lock.Lock() defer p.lock.Unlock() @@ -524,14 +560,6 @@ func (p *pinner) Flush() error { return nil } -func (p *pinner) InternalPins() []*cid.Cid { - p.lock.Lock() - defer p.lock.Unlock() - var out []*cid.Cid - out = append(out, p.internalPin.Keys()...) - return out -} - // PinWithMode allows the user to have fine grained control over pin // counts func (p *pinner) PinWithMode(c *cid.Cid, mode PinMode) { diff --git a/pin/source.go b/pin/source.go new file mode 100644 index 00000000000..afb8f484133 --- /dev/null +++ b/pin/source.go @@ -0,0 +1,34 @@ +package pin + +import ( + cid "gx/ipfs/QmNp85zy9RLrQ5oQD4hPyS39ezrrXpcaa7R4Y9kxdWQLLQ/go-cid" +) + +// Source is structure describing source of the pin +type Source struct { + // Get if a function that will be called to get the pins for GCing + Get func() ([]*cid.Cid, error) + // Strict makes the GC fail if some objects can't be fetched during + // recursive traversal of the graph + Strict bool + // Direct marks the pinned object as the final object in the traversal + Direct bool + // Internal marks the pin source which recursive enumeration should be + // terminated by a direct pin + Internal bool +} + +// SortValue is sort order for the GC +func (p Source) SortValue() int { + v := 0 + if !p.Strict { + v |= 1 << 0 + } + if p.Direct { + v |= 1 << 1 + } + if p.Internal { + v |= 1 << 2 + } + return v +}