This repository has been archived by the owner on May 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
A new CLI tool to generate folder unixfs ipld car
- Loading branch information
Showing
14 changed files
with
590 additions
and
255 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
package main | ||
|
||
import ( | ||
"bufio" | ||
"context" | ||
"encoding/json" | ||
"fmt" | ||
commcid "github.com/filecoin-project/go-fil-commcid" | ||
commp "github.com/filecoin-project/go-fil-commp-hashhash" | ||
"github.com/google/uuid" | ||
"github.com/pkg/errors" | ||
"github.com/tech-greedy/generate-car/cmd/generate-ipld-car/util" | ||
"github.com/urfave/cli/v2" | ||
"io" | ||
"os" | ||
"path" | ||
"path/filepath" | ||
) | ||
|
||
type Result struct { | ||
DataCid string | ||
PieceCid string | ||
PieceSize uint64 | ||
} | ||
|
||
const BufSize = (4 << 20) / 128 * 127 | ||
|
||
func main() { | ||
app := &cli.App{ | ||
Name: "generate-ipld-car", | ||
Usage: "generate ipld car archive from list of files and compute commp in the mean time. The generated car file only contains the file and folder information, not the actual data.", | ||
Flags: []cli.Flag{ | ||
&cli.StringFlag{ | ||
Name: "input", | ||
Aliases: []string{"i"}, | ||
Usage: "This is a ndjson file containing the list of files to be included in the car archive. If not specified, use stdin instead.", | ||
Value: "-", | ||
}, | ||
&cli.Uint64Flag{ | ||
Name: "piece-size", | ||
Aliases: []string{"s"}, | ||
Usage: "Target piece size, default to minimum possible value", | ||
Value: 0, | ||
}, | ||
&cli.StringFlag{ | ||
Name: "out-dir", | ||
Aliases: []string{"o"}, | ||
Usage: "Output directory to save the car file", | ||
Value: ".", | ||
}, | ||
&cli.StringFlag{ | ||
Name: "parent", | ||
Aliases: []string{"p"}, | ||
Usage: "Parent path of the dataset", | ||
Required: true, | ||
}, | ||
}, Action: func(c *cli.Context) error { | ||
inputFile := c.String("input") | ||
pieceSizeInput := c.Uint64("piece-size") | ||
outDir := c.String("out-dir") | ||
parent := c.String("parent") | ||
var in *os.File | ||
if inputFile == "-" { | ||
in = os.Stdin | ||
} else { | ||
inFile, err := os.Open(inputFile) | ||
if err != nil { | ||
return errors.Wrap(err, "failed to open input file") | ||
} | ||
|
||
in = inFile | ||
} | ||
|
||
defer in.Close() | ||
outFilename := uuid.New().String() + ".car" | ||
outPath := filepath.Join(outDir, outFilename) | ||
carF, err := os.Create(outPath) | ||
if err != nil { | ||
return errors.Wrap(err, "failed to create car file") | ||
} | ||
|
||
cp := new(commp.Calc) | ||
writer := bufio.NewWriterSize(io.MultiWriter(carF, cp), BufSize) | ||
cid, err := util.GenerateIpldCar(context.TODO(), in, parent, writer) | ||
if err != nil { | ||
return errors.Wrap(err, "failed to generate car file") | ||
} | ||
err = writer.Flush() | ||
if err != nil { | ||
return errors.Wrap(err, "failed to flush writer") | ||
} | ||
err = carF.Close() | ||
if err != nil { | ||
return errors.Wrap(err, "failed to close car file") | ||
} | ||
rawCommP, pieceSize, err := cp.Digest() | ||
if err != nil { | ||
return errors.Wrap(err, "failed to compute commp") | ||
} | ||
if pieceSizeInput > 0 { | ||
rawCommP, err = commp.PadCommP( | ||
rawCommP, | ||
pieceSize, | ||
pieceSizeInput, | ||
) | ||
if err != nil { | ||
return err | ||
} | ||
pieceSize = pieceSizeInput | ||
} | ||
commCid, err := commcid.DataCommitmentV1ToCID(rawCommP) | ||
if err != nil { | ||
return errors.Wrap(err, "failed to convert commp to cid") | ||
} | ||
err = os.Rename(outPath, path.Join(outDir, commCid.String()+".car")) | ||
if err != nil { | ||
return errors.Wrap(err, "failed to rename car file") | ||
} | ||
output, err := json.Marshal(Result{ | ||
DataCid: cid.String(), | ||
PieceCid: commCid.String(), | ||
PieceSize: pieceSize, | ||
}) | ||
if err != nil { | ||
return errors.Wrap(err, "failed to marshal result") | ||
} | ||
fmt.Println(string(output)) | ||
return nil | ||
}, | ||
} | ||
err := app.Run(os.Args) | ||
if err != nil { | ||
panic(err) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
package util | ||
|
||
import ( | ||
"github.com/ipfs/go-cid" | ||
ipld "github.com/ipfs/go-ipld-format" | ||
"github.com/pkg/errors" | ||
) | ||
|
||
type FakeFSNode struct { | ||
size uint64 | ||
cid cid.Cid | ||
} | ||
|
||
var ErrEmptyNode error = errors.New("fake fs node") | ||
|
||
func NewFakeFSNode(size uint64, cid cid.Cid) FakeFSNode { | ||
return FakeFSNode{size: size, cid: cid} | ||
} | ||
|
||
func (f FakeFSNode) RawData() []byte { | ||
return nil | ||
} | ||
|
||
func (f FakeFSNode) Cid() cid.Cid { | ||
return f.cid | ||
} | ||
|
||
func (f FakeFSNode) String() string { | ||
return "FakeFSNode - " + f.cid.String() | ||
} | ||
|
||
func (f FakeFSNode) Loggable() map[string]interface{} { | ||
return nil | ||
} | ||
|
||
func (f FakeFSNode) Resolve(path []string) (interface{}, []string, error) { | ||
return nil, nil, ErrEmptyNode | ||
} | ||
|
||
func (f FakeFSNode) Tree(path string, depth int) []string { | ||
return nil | ||
} | ||
|
||
func (f FakeFSNode) ResolveLink(path []string) (*ipld.Link, []string, error) { | ||
return nil, nil, ErrEmptyNode | ||
} | ||
|
||
func (f FakeFSNode) Copy() ipld.Node { | ||
return &FakeFSNode{size: f.size, cid: f.cid} | ||
} | ||
|
||
func (f FakeFSNode) Links() []*ipld.Link { | ||
return nil | ||
} | ||
|
||
func (f FakeFSNode) Stat() (*ipld.NodeStat, error) { | ||
return &ipld.NodeStat{}, nil | ||
} | ||
|
||
func (f FakeFSNode) Size() (uint64, error) { | ||
return f.size, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
package util | ||
|
||
import ( | ||
"bufio" | ||
"context" | ||
"encoding/json" | ||
"github.com/ipfs/go-blockservice" | ||
"github.com/ipfs/go-cid" | ||
"github.com/ipfs/go-datastore" | ||
bstore "github.com/ipfs/go-ipfs-blockstore" | ||
ipld "github.com/ipfs/go-ipld-format" | ||
"github.com/ipfs/go-merkledag" | ||
"github.com/ipfs/go-unixfs" | ||
uio "github.com/ipfs/go-unixfs/io" | ||
unixfs_pb "github.com/ipfs/go-unixfs/pb" | ||
"github.com/ipld/go-car" | ||
"github.com/pkg/errors" | ||
"io" | ||
"path/filepath" | ||
"strings" | ||
) | ||
|
||
type FileInfo struct { | ||
Path string | ||
Size uint64 | ||
Start uint64 | ||
End uint64 | ||
Cid string | ||
} | ||
|
||
type FsType int | ||
|
||
const ( | ||
Dir FsType = iota | ||
File | ||
) | ||
|
||
type FsEntry struct { | ||
Type FsType | ||
Chunks []FileInfo | ||
SubEntries map[string]*FsEntry | ||
} | ||
|
||
func getNode(ctx context.Context, entry *FsEntry, dagServ ipld.DAGService) (ipld.Node, error) { | ||
cidBuilder := merkledag.V1CidPrefix() | ||
switch entry.Type { | ||
case Dir: | ||
dir := uio.NewDirectory(dagServ) | ||
dir.SetCidBuilder(cidBuilder) | ||
for name, subEntry := range entry.SubEntries { | ||
subNode, err := getNode(ctx, subEntry, dagServ) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to get node for sub entry") | ||
} | ||
err = dir.AddChild(ctx, name, subNode) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to add child to directory") | ||
} | ||
} | ||
node, err := dir.GetNode() | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to get node from directory") | ||
} | ||
err = dagServ.Add(ctx, node) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to add node to dag service") | ||
} | ||
return node, nil | ||
case File: | ||
if len(entry.Chunks) == 1 { | ||
cid, err := cid.Parse(entry.Chunks[0].Cid) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to parse cid") | ||
} | ||
node := NewFakeFSNode(entry.Chunks[0].Size, cid) | ||
/* Do not add to dag service because this is a fake node | ||
err = dagServ.Add(ctx, node) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to add node to dag service") | ||
} | ||
*/ | ||
return &node, nil | ||
} else { | ||
node := unixfs.NewFSNode(unixfs_pb.Data_File) | ||
var links []ipld.Link | ||
for _, chunk := range entry.Chunks { | ||
size := chunk.End - chunk.Start | ||
cid, err := cid.Parse(chunk.Cid) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to parse cid") | ||
} | ||
links = append(links, ipld.Link{ | ||
Name: "", | ||
Cid: cid, | ||
Size: size, | ||
}) | ||
node.AddBlockSize(size) | ||
} | ||
nodeBytes, err := node.GetBytes() | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to get bytes from fs node") | ||
} | ||
pbNode := merkledag.NodeWithData(nodeBytes) | ||
pbNode.SetCidBuilder(merkledag.V1CidPrefix()) | ||
for _, link := range links { | ||
err = pbNode.AddRawLink("", &link) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to add link to node") | ||
} | ||
} | ||
err = dagServ.Add(ctx, pbNode) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "failed to add node to dag service") | ||
} | ||
return pbNode, nil | ||
} | ||
} | ||
return nil, errors.New("invalid entry type") | ||
} | ||
|
||
func GenerateIpldCar(ctx context.Context, input io.Reader, parent string, writer io.Writer) (cid.Cid, error) { | ||
scanner := bufio.NewScanner(input) | ||
parentPath, err := filepath.Abs(parent) | ||
if err != nil { | ||
return cid.Undef, errors.Wrap(err, "failed to get absolute path of parent") | ||
} | ||
|
||
blockStore := bstore.NewBlockstore(datastore.NewMapDatastore()) | ||
dagServ := merkledag.NewDAGService(blockservice.New(blockStore, nil)) | ||
rootDir := FsEntry{ | ||
Type: Dir, | ||
SubEntries: make(map[string]*FsEntry), | ||
} | ||
// Fill up the tree with Type, Chunks and SubEntries | ||
for scanner.Scan() { | ||
line := scanner.Text() | ||
var finfo FileInfo | ||
err := json.Unmarshal([]byte(line), &finfo) | ||
if err != nil { | ||
return cid.Undef, errors.Wrap(err, "failed to unmarshal json") | ||
} | ||
|
||
fPath, err := filepath.Abs(finfo.Path) | ||
if err != nil { | ||
return cid.Undef, errors.Wrap(err, "failed to get absolute path of file") | ||
} | ||
|
||
relPath, err := filepath.Rel(parentPath, fPath) | ||
relSegments := strings.Split(relPath, string(filepath.Separator)) | ||
pos := &rootDir | ||
for i, seg := range relSegments { | ||
last := i == len(relSegments)-1 | ||
subEntry, ok := pos.SubEntries[seg] | ||
if !ok { | ||
if last { | ||
// Must be a file | ||
subEntry = &FsEntry{ | ||
Type: File, | ||
Chunks: make([]FileInfo, 0), | ||
} | ||
subEntry.Chunks = append(subEntry.Chunks, finfo) | ||
} else { | ||
// Must be a directory | ||
subEntry = &FsEntry{ | ||
Type: Dir, | ||
SubEntries: make(map[string]*FsEntry), | ||
} | ||
} | ||
pos.SubEntries[seg] = subEntry | ||
pos = subEntry | ||
} else { | ||
if last { | ||
// Must be a file | ||
subEntry.Chunks = append(subEntry.Chunks, finfo) | ||
} else { | ||
// Must be a directory | ||
pos = subEntry | ||
} | ||
} | ||
} | ||
} | ||
|
||
// Now iterate over the tree and create the IPLD nodes | ||
rootNode, err := getNode(ctx, &rootDir, dagServ) | ||
if err != nil { | ||
return cid.Undef, errors.Wrap(err, "failed to get root node") | ||
} | ||
err = car.WriteCar(ctx, dagServ, []cid.Cid{rootNode.Cid()}, writer, merkledag.IgnoreMissing()) | ||
if err != nil { | ||
return cid.Undef, errors.Wrap(err, "failed to write car file") | ||
} | ||
return rootNode.Cid(), nil | ||
} |
Oops, something went wrong.