From 9429fe3de5ec3ad8b139fe5460670cadfa1689ee Mon Sep 17 00:00:00 2001 From: Will Scott Date: Fri, 10 Sep 2021 01:03:42 -0700 Subject: [PATCH 1/4] better name for detaching index command --- v2/cmd/car/car.go | 7 +++---- v2/cmd/car/{split.go => detach.go} | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) rename v2/cmd/car/{split.go => detach.go} (83%) diff --git a/v2/cmd/car/car.go b/v2/cmd/car/car.go index 9f481cb4..8faca9ab 100644 --- a/v2/cmd/car/car.go +++ b/v2/cmd/car/car.go @@ -28,10 +28,9 @@ func main() { }, }, { - Name: "split", - Aliases: []string{"s"}, - Usage: "Split an index to a detached file", - Action: SplitCar, + Name: "detach-index", + Usage: "Detach an index to a detached file", + Action: DetachCar, }, }, } diff --git a/v2/cmd/car/split.go b/v2/cmd/car/detach.go similarity index 83% rename from v2/cmd/car/split.go rename to v2/cmd/car/detach.go index 7733e19c..276d73b4 100644 --- a/v2/cmd/car/split.go +++ b/v2/cmd/car/detach.go @@ -9,8 +9,8 @@ import ( "github.com/urfave/cli/v2" ) -// SplitCar is a command to output the index part of a car. -func SplitCar(c *cli.Context) error { +// DetachCar is a command to output the index part of a car. +func DetachCar(c *cli.Context) error { r, err := carv2.OpenReader(c.Args().Get(0)) if err != nil { return err From 4313e943b4965ad081acb3ec981f121cc7f3ff8b Mon Sep 17 00:00:00 2001 From: Will Scott Date: Fri, 10 Sep 2021 01:45:59 -0700 Subject: [PATCH 2/4] add list and filter commands --- v2/cmd/car/car.go | 19 ++++++++ v2/cmd/car/filter.go | 100 +++++++++++++++++++++++++++++++++++++++++++ v2/cmd/car/list.go | 47 ++++++++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 v2/cmd/car/filter.go create mode 100644 v2/cmd/car/list.go diff --git a/v2/cmd/car/car.go b/v2/cmd/car/car.go index 8faca9ab..3150dc05 100644 --- a/v2/cmd/car/car.go +++ b/v2/cmd/car/car.go @@ -32,6 +32,25 @@ func main() { Usage: "Detach an index to a detached file", Action: DetachCar, }, + { + Name: "list", + Aliases: []string{"l"}, + Usage: "List the CIDs in a car", + Action: ListCar, + }, + { + Name: "filter", + Aliases: []string{"f"}, + Usage: "Filter the CIDs in a car", + Action: FilterCar, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "cidFile", + Usage: "A file to read CIDs from", + TakesFile: true, + }, + }, + }, }, } diff --git a/v2/cmd/car/filter.go b/v2/cmd/car/filter.go new file mode 100644 index 00000000..d1483bf9 --- /dev/null +++ b/v2/cmd/car/filter.go @@ -0,0 +1,100 @@ +package main + +import ( + "bufio" + "fmt" + "io" + "os" + "strings" + + "github.com/ipfs/go-cid" + carv2 "github.com/ipld/go-car/v2" + "github.com/ipld/go-car/v2/blockstore" + icarv1 "github.com/ipld/go-car/v2/internal/carv1" + "github.com/urfave/cli/v2" +) + +// FilterCar is a command to select a subset of a car by CID. +func FilterCar(c *cli.Context) error { + r, err := carv2.OpenReader(c.Args().Get(0)) + if err != nil { + return err + } + defer r.Close() + + if c.Args().Len() < 2 { + return fmt.Errorf("an output filename must be provided") + } + roots, err := r.Roots() + if err != nil { + return err + } + bs, err := blockstore.OpenReadWrite(c.Args().Get(1), roots) + if err != nil { + return err + } + + // Get the set of CIDs from stdin. + inStream := os.Stdin + if c.IsSet("cidFile") { + inStream, err = os.Open(c.String("cidFile")) + if err != nil { + return err + } + defer inStream.Close() + } + cidList, err := parseCIDS(inStream) + if err != nil { + return err + } + fmt.Printf("filtering to %d cids\n", len(cidList)) + + cidMap := make(map[cid.Cid]struct{}) + for _, e := range cidList { + cidMap[e] = struct{}{} + } + + rd, err := icarv1.NewCarReader(r.DataReader()) + if err != nil { + return err + } + + for { + blk, err := rd.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + if _, ok := cidMap[blk.Cid()]; ok { + if err := bs.Put(blk); err != nil { + return err + } + } + } + return bs.Finalize() +} + +func parseCIDS(r io.Reader) ([]cid.Cid, error) { + cb := make([]cid.Cid, 0) + br := bufio.NewReader(r) + for { + line, _, err := br.ReadLine() + if err != nil { + if err == io.EOF { + return cb, nil + } + return nil, err + } + trimLine := strings.TrimSpace(string(line)) + if len(trimLine) == 0 { + continue + } + c, err := cid.Parse(trimLine) + if err != nil { + return nil, err + } + cb = append(cb, c) + } +} diff --git a/v2/cmd/car/list.go b/v2/cmd/car/list.go new file mode 100644 index 00000000..e6f9df3c --- /dev/null +++ b/v2/cmd/car/list.go @@ -0,0 +1,47 @@ +package main + +import ( + "fmt" + "io" + "os" + + carv2 "github.com/ipld/go-car/v2" + icarv1 "github.com/ipld/go-car/v2/internal/carv1" + "github.com/urfave/cli/v2" +) + +// ListCar is a command to output the cids in a car. +func ListCar(c *cli.Context) error { + r, err := carv2.OpenReader(c.Args().Get(0)) + if err != nil { + return err + } + defer r.Close() + + outStream := os.Stdout + if c.Args().Len() >= 2 { + outStream, err = os.Create(c.Args().Get(1)) + if err != nil { + return err + } + } + defer outStream.Close() + + rd, err := icarv1.NewCarReader(r.DataReader()) + if err != nil { + return err + } + + for { + blk, err := rd.Next() + if err != nil { + if err == io.EOF { + break + } + return err + } + outStream.WriteString(fmt.Sprintf("%s\n", blk.Cid())) + } + + return err +} From b0fcc695d1058fd3cb39e26218c8864b10bbb92a Mon Sep 17 00:00:00 2001 From: Will Scott Date: Fri, 10 Sep 2021 01:52:25 -0700 Subject: [PATCH 3/4] better flag name --- v2/cmd/car/car.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/v2/cmd/car/car.go b/v2/cmd/car/car.go index 3150dc05..df962938 100644 --- a/v2/cmd/car/car.go +++ b/v2/cmd/car/car.go @@ -45,7 +45,7 @@ func main() { Action: FilterCar, Flags: []cli.Flag{ &cli.StringFlag{ - Name: "cidFile", + Name: "cid-file", Usage: "A file to read CIDs from", TakesFile: true, }, From 37691529a747fc4fba258a7fafb7a65366c7100d Mon Sep 17 00:00:00 2001 From: Will Scott Date: Fri, 10 Sep 2021 02:11:00 -0700 Subject: [PATCH 4/4] reviews --- v2/cmd/car/filter.go | 20 +++++++++----------- v2/cmd/car/list.go | 16 +++++++++++----- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/v2/cmd/car/filter.go b/v2/cmd/car/filter.go index d1483bf9..cb58b4c1 100644 --- a/v2/cmd/car/filter.go +++ b/v2/cmd/car/filter.go @@ -43,16 +43,11 @@ func FilterCar(c *cli.Context) error { } defer inStream.Close() } - cidList, err := parseCIDS(inStream) + cidMap, err := parseCIDS(inStream) if err != nil { return err } - fmt.Printf("filtering to %d cids\n", len(cidList)) - - cidMap := make(map[cid.Cid]struct{}) - for _, e := range cidList { - cidMap[e] = struct{}{} - } + fmt.Printf("filtering to %d cids\n", len(cidMap)) rd, err := icarv1.NewCarReader(r.DataReader()) if err != nil { @@ -76,14 +71,14 @@ func FilterCar(c *cli.Context) error { return bs.Finalize() } -func parseCIDS(r io.Reader) ([]cid.Cid, error) { - cb := make([]cid.Cid, 0) +func parseCIDS(r io.Reader) (map[cid.Cid]struct{}, error) { + cids := make(map[cid.Cid]struct{}) br := bufio.NewReader(r) for { line, _, err := br.ReadLine() if err != nil { if err == io.EOF { - return cb, nil + return cids, nil } return nil, err } @@ -95,6 +90,9 @@ func parseCIDS(r io.Reader) ([]cid.Cid, error) { if err != nil { return nil, err } - cb = append(cb, c) + if _, ok := cids[c]; ok { + fmt.Fprintf(os.Stderr, "duplicate cid: %s\n", c) + } + cids[c] = struct{}{} } } diff --git a/v2/cmd/car/list.go b/v2/cmd/car/list.go index e6f9df3c..e9cf1f7e 100644 --- a/v2/cmd/car/list.go +++ b/v2/cmd/car/list.go @@ -6,17 +6,24 @@ import ( "os" carv2 "github.com/ipld/go-car/v2" - icarv1 "github.com/ipld/go-car/v2/internal/carv1" "github.com/urfave/cli/v2" ) // ListCar is a command to output the cids in a car. func ListCar(c *cli.Context) error { - r, err := carv2.OpenReader(c.Args().Get(0)) + inStream := os.Stdin + var err error + if c.Args().Len() >= 1 { + inStream, err = os.Open(c.Args().First()) + if err != nil { + return err + } + defer inStream.Close() + } + rd, err := carv2.NewBlockReader(inStream) if err != nil { return err } - defer r.Close() outStream := os.Stdout if c.Args().Len() >= 2 { @@ -27,7 +34,6 @@ func ListCar(c *cli.Context) error { } defer outStream.Close() - rd, err := icarv1.NewCarReader(r.DataReader()) if err != nil { return err } @@ -40,7 +46,7 @@ func ListCar(c *cli.Context) error { } return err } - outStream.WriteString(fmt.Sprintf("%s\n", blk.Cid())) + fmt.Fprintf(outStream, "%s\n", blk.Cid()) } return err