Reuse slice of record

This expected to avoid about 3.5% of alloc_objects alloc_objects: Total: 773496750 773873722 (flat, cum) 7.18% 177 . . parser.fieldIndexes = parser.fieldIndexes[:0] 178 . . 179 . . isEmptyLine := true ... 225 386621314 386621314 str := string(parser.recordBuffer) // Convert to string once to batch allocations 226 386875436 386875436 dst := make([]string, len(parser.fieldIndexes))
pingcap · Mar 12, 2020 · 6f21d82 · 6f21d82
1 parent 74f35b8
commit 6f21d82
Showing 1 changed file with 12 additions and 5 deletions.
diff --git a/lightning/mydump/csv_parser.go b/lightning/mydump/csv_parser.go
@@ -17,12 +17,12 @@ import (
 	"bytes"
 	"io"
 	"strings"
+	"sync"
 
 	"github.com/pingcap/errors"
 	"github.com/pingcap/tidb-lightning/lightning/config"
 	"github.com/pingcap/tidb-lightning/lightning/worker"
 	"github.com/pingcap/tidb/types"
-	"sync"
 )
 
 var (
@@ -51,6 +51,8 @@ type CSVParser struct {
 	// fieldIndexes is an index of fields inside recordBuffer.
 	// The i'th field ends at offset fieldIndexes[i] in recordBuffer.
 	fieldIndexes []int
+
+	lastRecord []string
 }
 
 const estColCnt = 10
@@ -172,7 +174,7 @@ func (parser *CSVParser) readUntil(chars string) ([]byte, byte, error) {
 	}
 }
 
-func (parser *CSVParser) readRecord() ([]string, error) {
+func (parser *CSVParser) readRecord(dst []string) ([]string, error) {
 	parser.recordBuffer = parser.recordBuffer[:0]
 	parser.fieldIndexes = parser.fieldIndexes[:0]
 
@@ -223,7 +225,11 @@ outside:
 	// Create a single string and create slices out of it.
 	// This pins the memory of the fields together, but allocates once.
 	str := string(parser.recordBuffer) // Convert to string once to batch allocations
-	dst := make([]string, len(parser.fieldIndexes))
+	dst = dst[:0]
+	if cap(dst) < len(parser.fieldIndexes) {
+		dst = make([]string, len(parser.fieldIndexes))
+	}
+	dst = dst[:len(parser.fieldIndexes)]
 	var preIdx int
 	for i, idx := range parser.fieldIndexes {
 		dst[i] = str[preIdx:idx]
@@ -324,7 +330,7 @@ func (parser *CSVParser) ReadRow() error {
 
 	// skip the header first
 	if parser.pos == 0 && parser.cfg.Header {
-		columns, err := parser.readRecord()
+		columns, err := parser.readRecord(nil)
 		if err != nil {
 			return errors.Trace(err)
 		}
@@ -335,10 +341,11 @@ func (parser *CSVParser) ReadRow() error {
 		}
 	}
 
-	records, err := parser.readRecord()
+	records, err := parser.readRecord(parser.lastRecord)
 	if err != nil {
 		return errors.Trace(err)
 	}
+	parser.lastRecord = records
 	// remove trailing empty values
 	if parser.cfg.TrimLastSep {
 		i := len(records)