Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add list and filter commands #227

Merged
merged 4 commits into from
Sep 10, 2021
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 22 additions & 4 deletions v2/cmd/car/car.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,28 @@ func main() {
},
},
{
Name: "split",
Aliases: []string{"s"},
Usage: "Split an index to a detached file",
Action: SplitCar,
Name: "detach-index",
Usage: "Detach an index to a detached file",
Action: DetachCar,
},
{
Name: "list",
Aliases: []string{"l"},
Usage: "List the CIDs in a car",
Action: ListCar,
},
{
Name: "filter",
Aliases: []string{"f"},
Usage: "Filter the CIDs in a car",
Action: FilterCar,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "cid-file",
Usage: "A file to read CIDs from",
TakesFile: true,
},
},
},
},
}
Expand Down
4 changes: 2 additions & 2 deletions v2/cmd/car/split.go → v2/cmd/car/detach.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ import (
"github.com/urfave/cli/v2"
)

// SplitCar is a command to output the index part of a car.
func SplitCar(c *cli.Context) error {
// DetachCar is a command to output the index part of a car.
func DetachCar(c *cli.Context) error {
r, err := carv2.OpenReader(c.Args().Get(0))
if err != nil {
return err
Expand Down
100 changes: 100 additions & 0 deletions v2/cmd/car/filter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package main

import (
"bufio"
"fmt"
"io"
"os"
"strings"

"github.com/ipfs/go-cid"
carv2 "github.com/ipld/go-car/v2"
"github.com/ipld/go-car/v2/blockstore"
icarv1 "github.com/ipld/go-car/v2/internal/carv1"
"github.com/urfave/cli/v2"
)

// FilterCar is a command to select a subset of a car by CID.
func FilterCar(c *cli.Context) error {
r, err := carv2.OpenReader(c.Args().Get(0))
if err != nil {
return err
}
defer r.Close()

if c.Args().Len() < 2 {
return fmt.Errorf("an output filename must be provided")
}
roots, err := r.Roots()
if err != nil {
return err
}
bs, err := blockstore.OpenReadWrite(c.Args().Get(1), roots)
if err != nil {
return err
}

// Get the set of CIDs from stdin.
inStream := os.Stdin
if c.IsSet("cidFile") {
inStream, err = os.Open(c.String("cidFile"))
if err != nil {
return err
}
defer inStream.Close()
}
cidList, err := parseCIDS(inStream)
if err != nil {
return err
}
fmt.Printf("filtering to %d cids\n", len(cidList))

cidMap := make(map[cid.Cid]struct{})
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

consider making parseCIDs build a map directly

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also I wonder if you should at least print a warning on duplicate CIDs

for _, e := range cidList {
cidMap[e] = struct{}{}
}

rd, err := icarv1.NewCarReader(r.DataReader())
if err != nil {
return err
}

for {
blk, err := rd.Next()
if err != nil {
if err == io.EOF {
break
}
return err
}
if _, ok := cidMap[blk.Cid()]; ok {
if err := bs.Put(blk); err != nil {
return err
}
}
}
return bs.Finalize()
}

func parseCIDS(r io.Reader) ([]cid.Cid, error) {
cb := make([]cid.Cid, 0)
br := bufio.NewReader(r)
for {
line, _, err := br.ReadLine()
if err != nil {
if err == io.EOF {
return cb, nil
}
return nil, err
}
trimLine := strings.TrimSpace(string(line))
if len(trimLine) == 0 {
continue
}
c, err := cid.Parse(trimLine)
if err != nil {
return nil, err
}
cb = append(cb, c)
}
}
47 changes: 47 additions & 0 deletions v2/cmd/car/list.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package main

import (
"fmt"
"io"
"os"

carv2 "github.com/ipld/go-car/v2"
icarv1 "github.com/ipld/go-car/v2/internal/carv1"
"github.com/urfave/cli/v2"
)

// ListCar is a command to output the cids in a car.
func ListCar(c *cli.Context) error {
r, err := carv2.OpenReader(c.Args().Get(0))
if err != nil {
return err
}
defer r.Close()

outStream := os.Stdout
if c.Args().Len() >= 2 {
outStream, err = os.Create(c.Args().Get(1))
if err != nil {
return err
}
}
defer outStream.Close()

rd, err := icarv1.NewCarReader(r.DataReader())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use https://pkg.go.dev/github.com/ipld/go-car/v2@master#BlockReader? it was written precisely for this :) and it even supports both carv1 and carv2.

if err != nil {
return err
}

for {
blk, err := rd.Next()
if err != nil {
if err == io.EOF {
break
}
return err
}
outStream.WriteString(fmt.Sprintf("%s\n", blk.Cid()))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
outStream.WriteString(fmt.Sprintf("%s\n", blk.Cid()))
fmt.Fprintf(outStream, "%s\n", blk.Cid())

}

return err
}