From 9ac1b9ff09173580248162bc9e2ab673a5467da8 Mon Sep 17 00:00:00 2001 From: Hector Sanjuan Date: Wed, 15 Jun 2022 20:00:49 +0200 Subject: [PATCH] Make switchToSharding more efficient When automatically switching a BasicDirectory to a HAMTDirectory because it grew too big, the current code: * loops every link in the BasicDirectory * reads each node referenced by those links * adds the nodes to a new HAMTDirectory shard, which in turn: * writes the nodes to the DAG service (they were just read from there!) * makes a link out of them (identical to the link in the BasicDirectory!) This would happen to about (~4000 nodes), which are fully read and written for nothing. This PR adds a new SetLink method to the HAMT Shard which, instead of taking an ipld.Node like Set(), takes directly an ipld.Link. Then it updates switchToSharding() to pass the links in the BasicDirectory directy, rather than reading all the nodes. Note that switchToBasic() works like this already, only using the links in the HAMT directory. --- hamt/hamt.go | 20 ++++++++++++++++++++ io/directory.go | 7 +------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/hamt/hamt.go b/hamt/hamt.go index ac1c5e458..593b64627 100644 --- a/hamt/hamt.go +++ b/hamt/hamt.go @@ -229,6 +229,26 @@ func (ds *Shard) Set(ctx context.Context, name string, nd ipld.Node) error { return err } +// Set sets 'name' = nd in the HAMT, using directly the information in the +// given link. This avoids writing the given node, then reading it to making a +// link out of it. +func (ds *Shard) SetLink(ctx context.Context, name string, lnk *ipld.Link) error { + hv := newHashBits(name) + + newLink := ipld.Link{ + Name: lnk.Name, + Size: lnk.Size, + Cid: lnk.Cid, + } + + // FIXME: We don't need to set the name here, it will get overwritten. + // This is confusing, confirm and remove this line. + newLink.Name = ds.linkNamePrefix(0) + name + + _, err := ds.swapValue(ctx, hv, name, &newLink) + return err +} + // Swap sets a link pointing to the passed node as the value under the // name key in this Shard or its children. It also returns the previous link // under that name key (if any). diff --git a/io/directory.go b/io/directory.go index 2ec862247..b602bf9ab 100644 --- a/io/directory.go +++ b/io/directory.go @@ -334,12 +334,7 @@ func (d *BasicDirectory) switchToSharding(ctx context.Context) (*HAMTDirectory, hamtDir.shard = shard for _, lnk := range d.node.Links() { - node, err := d.dserv.Get(ctx, lnk.Cid) - if err != nil { - return nil, err - } - - err = hamtDir.shard.Set(ctx, lnk.Name, node) + err = hamtDir.shard.SetLink(ctx, lnk.Name, lnk) if err != nil { return nil, err }