Skip to content

Commit

Permalink
Merge pull request #6 from paracrawl/extrafiles-for-giamerge
Browse files Browse the repository at this point in the history
Add same -f option to giamerge
  • Loading branch information
jelmervdl authored Feb 1, 2021
2 parents c00c93c + bf45c41 commit e87d049
Showing 1 changed file with 5 additions and 2 deletions.
7 changes: 5 additions & 2 deletions cmd/giamerge/giamerge.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,18 @@ import (
"os"
"path/filepath"
"strconv"
"strings"
"github.com/paracrawl/giashard"
)

var outdir string
var shards uint
var batchsize int64

var schema = []string{"url", "mime", "plain_text", "source"}
var fileslist string

func init() {
flag.StringVar(&outdir, "o", ".", "Output location")
flag.StringVar(&fileslist, "f", "plain_text,url,mime,source", "Files to shard, separated by commas")
flag.UintVar(&shards, "n", 8, "Number of shards (2^n)")
flag.Int64Var(&batchsize, "b", 100, "Batch size in MB")
flag.Usage = func() {
Expand All @@ -32,6 +33,8 @@ func main() {
log.SetFlags(log.Ldate | log.Ltime | log.Lshortfile)
flag.Parse()

schema := strings.Split(fileslist, ",")

maxsize := batchsize * 1024 * 1024

err := os.MkdirAll(outdir, os.ModePerm)
Expand Down

0 comments on commit e87d049

Please sign in to comment.