dgraph-io · gitlw · Jul 22, 2019 · Jun 28, 2019 · Jun 29, 2019 · Jul 17, 2019
diff --git a/dgraph/cmd/bulk/count_index.go b/dgraph/cmd/bulk/count_index.go
@@ -19,7 +19,6 @@ package bulk
 import (
 	"bytes"
 	"sort"
-	"sync"
 	"sync/atomic"
 
 	"github.com/dgraph-io/badger"
@@ -41,7 +40,6 @@ type countIndexer struct {
 	writer *badger.StreamWriter
 	cur    current
 	counts map[int][]uint64
-	wg     sync.WaitGroup
 }
 
 // addUid adds the uid from rawKey to a count index if a count index is
@@ -59,8 +57,7 @@ func (c *countIndexer) addUid(rawKey []byte, count int) {
 
 	if !sameIndexKey {
 		if len(c.counts) > 0 {
-			c.wg.Add(1)
-			go c.writeIndex(c.cur.pred, c.cur.rev, c.counts)
+			c.writeIndex(c.cur.pred, c.cur.rev, c.counts)
 		}
 		if len(c.counts) > 0 || c.counts == nil {
 			c.counts = make(map[int][]uint64)
@@ -75,8 +72,6 @@ func (c *countIndexer) addUid(rawKey []byte, count int) {
 }
 
 func (c *countIndexer) writeIndex(pred string, rev bool, counts map[int][]uint64) {
-	defer c.wg.Done()
-
 	streamId := atomic.AddUint32(&c.streamId, 1)
 	list := &bpb.KVList{}
 	for count, uids := range counts {
@@ -101,7 +96,3 @@ func (c *countIndexer) writeIndex(pred string, rev bool, counts map[int][]uint64
 		x.Check(err)
 	}
 }
-
-func (c *countIndexer) wait() {
-	c.wg.Wait()
-}
diff --git a/dgraph/cmd/bulk/reduce.go b/dgraph/cmd/bulk/reduce.go
@@ -70,7 +70,6 @@ func (r *reducer) run() error {
 
 			ci := &countIndexer{reducer: r, writer: writer}
 			r.reduce(mapItrs, ci)
-			ci.wait()
 
 			if err := writer.Flush(); err != nil {
 				x.Check(err)
@@ -135,14 +134,14 @@ func newMapIterator(filename string) *mapIterator {
 	return &mapIterator{fd: fd, reader: bufio.NewReaderSize(fd, 16<<10)}
 }
 
+// encodeAndWrite converts the given batch into a KVList and then
+// send the list to the stream writer
+// while iterating the kvs in the batch, it also sets the kv's stream id
+// to a previously recorded stream id corresponding to the kv's attribute (predicate),
+// or a newly assigned stream id if the attribute has never been recorded
 func (r *reducer) encodeAndWrite(
-	writer *badger.StreamWriter, entryCh chan []*pb.MapEntry, closer *y.Closer) {
-	defer closer.Done()
+	writer *badger.StreamWriter, batch []*pb.MapEntry, preds map[string]uint32) {
 
-	var listSize int
-	list := &bpb.KVList{}
-
-	preds := make(map[string]uint32)
 	setStreamId := func(kv *bpb.KV) {
 		pk := x.Parse(kv.Key)
 		x.AssertTrue(len(pk.Attr) > 0)
@@ -160,30 +159,15 @@ func (r *reducer) encodeAndWrite(
 		kv.StreamId = streamId
 	}
 
-	for batch := range entryCh {
-		listSize += r.toList(batch, list)
-		if listSize > 4<<20 {
-			for _, kv := range list.Kv {
-				setStreamId(kv)
-			}
-			x.Check(writer.Write(list))
-			list = &bpb.KVList{}
-			listSize = 0
-		}
-	}
-	if len(list.Kv) > 0 {
-		for _, kv := range list.Kv {
-			setStreamId(kv)
-		}
-		x.Check(writer.Write(list))
+	list := &bpb.KVList{}
+	r.toList(batch, list)
+	for _, kv := range list.Kv {
+		setStreamId(kv)
 	}
+	x.Check(writer.Write(list))
 }
 
 func (r *reducer) reduce(mapItrs []*mapIterator, ci *countIndexer) {
-	entryCh := make(chan []*pb.MapEntry, 100)
-	closer := y.NewCloser(1)
-	defer closer.SignalAndWait()
-
 	var ph postingHeap
 	for _, itr := range mapItrs {
 		me := itr.Next()
@@ -195,14 +179,14 @@ func (r *reducer) reduce(mapItrs []*mapIterator, ci *countIndexer) {
 	}
 
 	writer := ci.writer
-	go r.encodeAndWrite(writer, entryCh, closer)
 
 	const batchSize = 10000
 	const batchAlloc = batchSize * 11 / 10
 	batch := make([]*pb.MapEntry, 0, batchAlloc)
 	var prevKey []byte
 	var plistLen int
 
+	preds := make(map[string]uint32)
 	for len(ph.nodes) > 0 {
 		node0 := &ph.nodes[0]
 		me := node0.mapEntry
@@ -223,20 +207,19 @@ func (r *reducer) reduce(mapItrs []*mapIterator, ci *countIndexer) {
 		}
 
 		if len(batch) >= batchSize && keyChanged {
-			entryCh <- batch
+			r.encodeAndWrite(writer, batch, preds)
 			batch = make([]*pb.MapEntry, 0, batchAlloc)
 		}
 		prevKey = me.Key
 		batch = append(batch, me)
 		plistLen++
 	}
 	if len(batch) > 0 {
-		entryCh <- batch
+		r.encodeAndWrite(writer, batch, preds)
 	}
 	if plistLen > 0 {
 		ci.addUid(prevKey, plistLen)
 	}
-	close(entryCh)
 }
 
 type heapNode struct {

diff --git a/worker/export.go b/worker/export.go
@@ -526,6 +526,9 @@ func export(ctx context.Context, in *pb.ExportRequest) error {
 			return toType(pk.Attr, update)
 
 		case pk.IsData():
+			// defining a local error variable to avoid concurrent write
+			// to the err variable defined in the export method
+			var err error
 			e.pl, err = posting.ReadPostingList(key, itr)
 			if err != nil {
 				return nil, err