Skip to content
This repository has been archived by the owner on May 24, 2024. It is now read-only.

Commit

Permalink
A new CLI tool to generate folder unixfs ipld car
Browse files Browse the repository at this point in the history
  • Loading branch information
xinaxu committed Mar 8, 2023
1 parent cbf2cdf commit e695adf
Show file tree
Hide file tree
Showing 14 changed files with 590 additions and 255 deletions.
135 changes: 135 additions & 0 deletions cmd/generate-ipld-car/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package main

import (
"bufio"
"context"
"encoding/json"
"fmt"
commcid "github.com/filecoin-project/go-fil-commcid"
commp "github.com/filecoin-project/go-fil-commp-hashhash"
"github.com/google/uuid"
"github.com/pkg/errors"
"github.com/tech-greedy/generate-car/cmd/generate-ipld-car/util"
"github.com/urfave/cli/v2"
"io"
"os"
"path"
"path/filepath"
)

type Result struct {
DataCid string
PieceCid string
PieceSize uint64
}

const BufSize = (4 << 20) / 128 * 127

func main() {
app := &cli.App{
Name: "generate-ipld-car",
Usage: "generate ipld car archive from list of files and compute commp in the mean time. The generated car file only contains the file and folder information, not the actual data.",
Flags: []cli.Flag{
&cli.StringFlag{
Name: "input",
Aliases: []string{"i"},
Usage: "This is a ndjson file containing the list of files to be included in the car archive. If not specified, use stdin instead.",
Value: "-",
},
&cli.Uint64Flag{
Name: "piece-size",
Aliases: []string{"s"},
Usage: "Target piece size, default to minimum possible value",
Value: 0,
},
&cli.StringFlag{
Name: "out-dir",
Aliases: []string{"o"},
Usage: "Output directory to save the car file",
Value: ".",
},
&cli.StringFlag{
Name: "parent",
Aliases: []string{"p"},
Usage: "Parent path of the dataset",
Required: true,
},
}, Action: func(c *cli.Context) error {
inputFile := c.String("input")
pieceSizeInput := c.Uint64("piece-size")
outDir := c.String("out-dir")
parent := c.String("parent")
var in *os.File
if inputFile == "-" {
in = os.Stdin
} else {
inFile, err := os.Open(inputFile)
if err != nil {
return errors.Wrap(err, "failed to open input file")
}

in = inFile
}

defer in.Close()
outFilename := uuid.New().String() + ".car"
outPath := filepath.Join(outDir, outFilename)
carF, err := os.Create(outPath)
if err != nil {
return errors.Wrap(err, "failed to create car file")
}

cp := new(commp.Calc)
writer := bufio.NewWriterSize(io.MultiWriter(carF, cp), BufSize)
cid, err := util.GenerateIpldCar(context.TODO(), in, parent, writer)
if err != nil {
return errors.Wrap(err, "failed to generate car file")
}
err = writer.Flush()
if err != nil {
return errors.Wrap(err, "failed to flush writer")
}
err = carF.Close()
if err != nil {
return errors.Wrap(err, "failed to close car file")
}
rawCommP, pieceSize, err := cp.Digest()
if err != nil {
return errors.Wrap(err, "failed to compute commp")
}
if pieceSizeInput > 0 {
rawCommP, err = commp.PadCommP(
rawCommP,
pieceSize,
pieceSizeInput,
)
if err != nil {
return err
}
pieceSize = pieceSizeInput
}
commCid, err := commcid.DataCommitmentV1ToCID(rawCommP)
if err != nil {
return errors.Wrap(err, "failed to convert commp to cid")
}
err = os.Rename(outPath, path.Join(outDir, commCid.String()+".car"))
if err != nil {
return errors.Wrap(err, "failed to rename car file")
}
output, err := json.Marshal(Result{
DataCid: cid.String(),
PieceCid: commCid.String(),
PieceSize: pieceSize,
})
if err != nil {
return errors.Wrap(err, "failed to marshal result")
}
fmt.Println(string(output))
return nil
},
}
err := app.Run(os.Args)
if err != nil {
panic(err)
}
}
62 changes: 62 additions & 0 deletions cmd/generate-ipld-car/util/FakeFSNode.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package util

import (
"github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"
"github.com/pkg/errors"
)

type FakeFSNode struct {
size uint64
cid cid.Cid
}

var ErrEmptyNode error = errors.New("fake fs node")

func NewFakeFSNode(size uint64, cid cid.Cid) FakeFSNode {
return FakeFSNode{size: size, cid: cid}
}

func (f FakeFSNode) RawData() []byte {
return nil
}

func (f FakeFSNode) Cid() cid.Cid {
return f.cid
}

func (f FakeFSNode) String() string {
return "FakeFSNode - " + f.cid.String()
}

func (f FakeFSNode) Loggable() map[string]interface{} {
return nil
}

func (f FakeFSNode) Resolve(path []string) (interface{}, []string, error) {
return nil, nil, ErrEmptyNode
}

func (f FakeFSNode) Tree(path string, depth int) []string {
return nil
}

func (f FakeFSNode) ResolveLink(path []string) (*ipld.Link, []string, error) {
return nil, nil, ErrEmptyNode
}

func (f FakeFSNode) Copy() ipld.Node {
return &FakeFSNode{size: f.size, cid: f.cid}
}

func (f FakeFSNode) Links() []*ipld.Link {
return nil
}

func (f FakeFSNode) Stat() (*ipld.NodeStat, error) {
return &ipld.NodeStat{}, nil
}

func (f FakeFSNode) Size() (uint64, error) {
return f.size, nil
}
193 changes: 193 additions & 0 deletions cmd/generate-ipld-car/util/generate-ipld.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
package util

import (
"bufio"
"context"
"encoding/json"
"github.com/ipfs/go-blockservice"
"github.com/ipfs/go-cid"
"github.com/ipfs/go-datastore"
bstore "github.com/ipfs/go-ipfs-blockstore"
ipld "github.com/ipfs/go-ipld-format"
"github.com/ipfs/go-merkledag"
"github.com/ipfs/go-unixfs"
uio "github.com/ipfs/go-unixfs/io"
unixfs_pb "github.com/ipfs/go-unixfs/pb"
"github.com/ipld/go-car"
"github.com/pkg/errors"
"io"
"path/filepath"
"strings"
)

type FileInfo struct {
Path string
Size uint64
Start uint64
End uint64
Cid string
}

type FsType int

const (
Dir FsType = iota
File
)

type FsEntry struct {
Type FsType
Chunks []FileInfo
SubEntries map[string]*FsEntry
}

func getNode(ctx context.Context, entry *FsEntry, dagServ ipld.DAGService) (ipld.Node, error) {
cidBuilder := merkledag.V1CidPrefix()
switch entry.Type {
case Dir:
dir := uio.NewDirectory(dagServ)
dir.SetCidBuilder(cidBuilder)
for name, subEntry := range entry.SubEntries {
subNode, err := getNode(ctx, subEntry, dagServ)
if err != nil {
return nil, errors.Wrap(err, "failed to get node for sub entry")
}
err = dir.AddChild(ctx, name, subNode)
if err != nil {
return nil, errors.Wrap(err, "failed to add child to directory")
}
}
node, err := dir.GetNode()
if err != nil {
return nil, errors.Wrap(err, "failed to get node from directory")
}
err = dagServ.Add(ctx, node)
if err != nil {
return nil, errors.Wrap(err, "failed to add node to dag service")
}
return node, nil
case File:
if len(entry.Chunks) == 1 {
cid, err := cid.Parse(entry.Chunks[0].Cid)
if err != nil {
return nil, errors.Wrap(err, "failed to parse cid")
}
node := NewFakeFSNode(entry.Chunks[0].Size, cid)
/* Do not add to dag service because this is a fake node
err = dagServ.Add(ctx, node)
if err != nil {
return nil, errors.Wrap(err, "failed to add node to dag service")
}
*/
return &node, nil
} else {
node := unixfs.NewFSNode(unixfs_pb.Data_File)
var links []ipld.Link
for _, chunk := range entry.Chunks {
size := chunk.End - chunk.Start
cid, err := cid.Parse(chunk.Cid)
if err != nil {
return nil, errors.Wrap(err, "failed to parse cid")
}
links = append(links, ipld.Link{
Name: "",
Cid: cid,
Size: size,
})
node.AddBlockSize(size)
}
nodeBytes, err := node.GetBytes()
if err != nil {
return nil, errors.Wrap(err, "failed to get bytes from fs node")
}
pbNode := merkledag.NodeWithData(nodeBytes)
pbNode.SetCidBuilder(merkledag.V1CidPrefix())
for _, link := range links {
err = pbNode.AddRawLink("", &link)
if err != nil {
return nil, errors.Wrap(err, "failed to add link to node")
}
}
err = dagServ.Add(ctx, pbNode)
if err != nil {
return nil, errors.Wrap(err, "failed to add node to dag service")
}
return pbNode, nil
}
}
return nil, errors.New("invalid entry type")
}

func GenerateIpldCar(ctx context.Context, input io.Reader, parent string, writer io.Writer) (cid.Cid, error) {
scanner := bufio.NewScanner(input)
parentPath, err := filepath.Abs(parent)
if err != nil {
return cid.Undef, errors.Wrap(err, "failed to get absolute path of parent")
}

blockStore := bstore.NewBlockstore(datastore.NewMapDatastore())
dagServ := merkledag.NewDAGService(blockservice.New(blockStore, nil))
rootDir := FsEntry{
Type: Dir,
SubEntries: make(map[string]*FsEntry),
}
// Fill up the tree with Type, Chunks and SubEntries
for scanner.Scan() {
line := scanner.Text()
var finfo FileInfo
err := json.Unmarshal([]byte(line), &finfo)
if err != nil {
return cid.Undef, errors.Wrap(err, "failed to unmarshal json")
}

fPath, err := filepath.Abs(finfo.Path)
if err != nil {
return cid.Undef, errors.Wrap(err, "failed to get absolute path of file")
}

relPath, err := filepath.Rel(parentPath, fPath)
relSegments := strings.Split(relPath, string(filepath.Separator))
pos := &rootDir
for i, seg := range relSegments {
last := i == len(relSegments)-1
subEntry, ok := pos.SubEntries[seg]
if !ok {
if last {
// Must be a file
subEntry = &FsEntry{
Type: File,
Chunks: make([]FileInfo, 0),
}
subEntry.Chunks = append(subEntry.Chunks, finfo)
} else {
// Must be a directory
subEntry = &FsEntry{
Type: Dir,
SubEntries: make(map[string]*FsEntry),
}
}
pos.SubEntries[seg] = subEntry
pos = subEntry
} else {
if last {
// Must be a file
subEntry.Chunks = append(subEntry.Chunks, finfo)
} else {
// Must be a directory
pos = subEntry
}
}
}
}

// Now iterate over the tree and create the IPLD nodes
rootNode, err := getNode(ctx, &rootDir, dagServ)
if err != nil {
return cid.Undef, errors.Wrap(err, "failed to get root node")
}
err = car.WriteCar(ctx, dagServ, []cid.Cid{rootNode.Cid()}, writer, merkledag.IgnoreMissing())
if err != nil {
return cid.Undef, errors.Wrap(err, "failed to write car file")
}
return rootNode.Cid(), nil
}
Loading

0 comments on commit e695adf

Please sign in to comment.