Skip to content
This repository has been archived by the owner on Dec 8, 2021. It is now read-only.

Commit

Permalink
Reuse slice of record
Browse files Browse the repository at this point in the history
This expected to avoid about 3.5% of alloc_objects
alloc_objects:
  Total:   773496750  773873722 (flat, cum)  7.18%
    177            .          .           	parser.fieldIndexes = parser.fieldIndexes[:0]
    178            .          .
    179            .          .           	isEmptyLine := true
...
    225    386621314  386621314           	str := string(parser.recordBuffer) // Convert to string once to batch allocations
    226    386875436  386875436           	dst := make([]string, len(parser.fieldIndexes))
  • Loading branch information
july2993 committed Mar 12, 2020
1 parent 74f35b8 commit 6f21d82
Showing 1 changed file with 12 additions and 5 deletions.
17 changes: 12 additions & 5 deletions lightning/mydump/csv_parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@ import (
"bytes"
"io"
"strings"
"sync"

"github.com/pingcap/errors"
"github.com/pingcap/tidb-lightning/lightning/config"
"github.com/pingcap/tidb-lightning/lightning/worker"
"github.com/pingcap/tidb/types"
"sync"
)

var (
Expand Down Expand Up @@ -51,6 +51,8 @@ type CSVParser struct {
// fieldIndexes is an index of fields inside recordBuffer.
// The i'th field ends at offset fieldIndexes[i] in recordBuffer.
fieldIndexes []int

lastRecord []string
}

const estColCnt = 10
Expand Down Expand Up @@ -172,7 +174,7 @@ func (parser *CSVParser) readUntil(chars string) ([]byte, byte, error) {
}
}

func (parser *CSVParser) readRecord() ([]string, error) {
func (parser *CSVParser) readRecord(dst []string) ([]string, error) {
parser.recordBuffer = parser.recordBuffer[:0]
parser.fieldIndexes = parser.fieldIndexes[:0]

Expand Down Expand Up @@ -223,7 +225,11 @@ outside:
// Create a single string and create slices out of it.
// This pins the memory of the fields together, but allocates once.
str := string(parser.recordBuffer) // Convert to string once to batch allocations
dst := make([]string, len(parser.fieldIndexes))
dst = dst[:0]
if cap(dst) < len(parser.fieldIndexes) {
dst = make([]string, len(parser.fieldIndexes))
}
dst = dst[:len(parser.fieldIndexes)]
var preIdx int
for i, idx := range parser.fieldIndexes {
dst[i] = str[preIdx:idx]
Expand Down Expand Up @@ -324,7 +330,7 @@ func (parser *CSVParser) ReadRow() error {

// skip the header first
if parser.pos == 0 && parser.cfg.Header {
columns, err := parser.readRecord()
columns, err := parser.readRecord(nil)
if err != nil {
return errors.Trace(err)
}
Expand All @@ -335,10 +341,11 @@ func (parser *CSVParser) ReadRow() error {
}
}

records, err := parser.readRecord()
records, err := parser.readRecord(parser.lastRecord)
if err != nil {
return errors.Trace(err)
}
parser.lastRecord = records
// remove trailing empty values
if parser.cfg.TrimLastSep {
i := len(records)
Expand Down

0 comments on commit 6f21d82

Please sign in to comment.