Skip to content

Commit

Permalink
importccl: compute expected number of data columns only once
Browse files Browse the repository at this point in the history
This commit moves the computation of finding the expected number of data
columns to the creation of the input converter, rather than
re-calculating it on every row. The memory footprint of loading all of
the visible columns for every row was noticeable.

Release note (performance improvement): CSV imports should now be
slightly faster.
  • Loading branch information
pbardea committed Oct 26, 2020
1 parent 124209e commit 6a9eaf3
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 22 deletions.
4 changes: 1 addition & 3 deletions pkg/ccl/importccl/import_processor.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,9 +156,7 @@ func makeInputConverter(
if len(singleTableTargetCols) == 0 && !formatHasNamedColumns(spec.Format.Format) {
for _, col := range singleTable.VisibleColumns() {
if col.IsComputed() {
// TODO(before merge): Replace this issue number with one that
// summarizes the above comment in a ticket.
return nil, unimplemented.NewWithIssueDetail(42846, "import.computed",
return nil, unimplemented.NewWithIssueDetail(56002, "import.computed",
"to use computed columns, use IMPORT INTO")
}
}
Expand Down
42 changes: 23 additions & 19 deletions pkg/ccl/importccl/read_import_csv.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ import (

type csvInputReader struct {
importCtx *parallelImportContext
opts roachpb.CSVOptions
// The number of columns that we expect in the CSV data file.
numExpectedDataCols int
opts roachpb.CSVOptions
}

var _ inputConverter = &csvInputReader{}
Expand All @@ -40,6 +42,10 @@ func newCSVInputReader(
targetCols tree.NameList,
evalCtx *tree.EvalContext,
) *csvInputReader {
numExpectedDataCols := len(targetCols)
if numExpectedDataCols == 0 {
numExpectedDataCols = len(tableDesc.VisibleColumns())
}
return &csvInputReader{
importCtx: &parallelImportContext{
walltime: walltime,
Expand All @@ -49,7 +55,8 @@ func newCSVInputReader(
targetCols: targetCols,
kvCh: kvCh,
},
opts: opts,
numExpectedDataCols: numExpectedDataCols,
opts: opts,
}
}

Expand Down Expand Up @@ -86,14 +93,14 @@ func (c *csvInputReader) readFile(
}

type csvRowProducer struct {
importCtx *parallelImportContext
opts *roachpb.CSVOptions
csv *csv.Reader
rowNum int64
err error
record []string
progress func() float32
expectedColumns tree.NameList
importCtx *parallelImportContext
opts *roachpb.CSVOptions
csv *csv.Reader
rowNum int64
err error
record []string
progress func() float32
numExpectedColumns int
}

var _ importRowProducer = &csvRowProducer{}
Expand Down Expand Up @@ -132,10 +139,7 @@ func strRecord(record []string, sep rune) string {
// Row() implements importRowProducer interface.
func (p *csvRowProducer) Row() (interface{}, error) {
p.rowNum++
expectedColsLen := len(p.expectedColumns)
if expectedColsLen == 0 {
expectedColsLen = len(p.importCtx.tableDesc.VisibleColumns())
}
expectedColsLen := p.numExpectedColumns

if len(p.record) == expectedColsLen {
// Expected number of columns.
Expand Down Expand Up @@ -207,11 +211,11 @@ func newCSVPipeline(c *csvInputReader, input *fileReader) (*csvRowProducer, *csv
cr.Comment = c.opts.Comment

producer := &csvRowProducer{
importCtx: c.importCtx,
opts: &c.opts,
csv: cr,
progress: func() float32 { return input.ReadFraction() },
expectedColumns: c.importCtx.targetCols,
importCtx: c.importCtx,
opts: &c.opts,
csv: cr,
progress: func() float32 { return input.ReadFraction() },
numExpectedColumns: c.numExpectedDataCols,
}
consumer := &csvRowConsumer{
importCtx: c.importCtx,
Expand Down

0 comments on commit 6a9eaf3

Please sign in to comment.