From e097175caaa6e2bfe3e08188c72c09a5b51b36d8 Mon Sep 17 00:00:00 2001 From: elsa Date: Wed, 6 May 2020 14:20:04 +0200 Subject: [PATCH] specify which files to shard with -f option --- cmd/giashard/main.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmd/giashard/main.go b/cmd/giashard/main.go index 8c6f788..5a9879e 100644 --- a/cmd/giashard/main.go +++ b/cmd/giashard/main.go @@ -2,6 +2,7 @@ package main import ( "errors" + "strings" "flag" "fmt" "log" @@ -12,11 +13,13 @@ import ( var outdir string var shards uint var batchsize int64 +var fileslist string var schema = []string{"url", "mime", "plain_text"} func init() { flag.StringVar(&outdir, "o", ".", "Output location") + flag.StringVar(&fileslist, "f", "plain_text,url,mime", "Files to shard, separated by commas") flag.UintVar(&shards, "n", 8, "Number of shards (2^n)") flag.Int64Var(&batchsize, "b", 100, "Batch size in MB") flag.Usage = func() { @@ -34,6 +37,7 @@ significant part of the hostname in a url and batch is approximately fixed size. func main() { log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile) flag.Parse() + schema = strings.Split(fileslist, ",") w, err := giashard.NewShard(outdir, shards, batchsize * 1024 * 1024, "url", append(schema, "source")...) if err != nil {