From 124209eeafcc226068b5fc7e5fdc3426a1890341 Mon Sep 17 00:00:00 2001 From: Paul Bardea Date: Wed, 21 Oct 2020 23:36:23 -0400 Subject: [PATCH] importccl: lift computed column check to input converter The current implementation of checking for validating the number of rows for computed columns for in non-IMPORT INTO backups is inefficient. This commit moves the check from being performed on every row to only being performed once per import. Release note: None --- pkg/ccl/importccl/import_processor.go | 18 ++++++++++++++++++ pkg/ccl/importccl/import_stmt_test.go | 12 +++++------- pkg/ccl/importccl/read_import_base.go | 12 ++++++++++++ pkg/ccl/importccl/read_import_csv.go | 9 --------- 4 files changed, 35 insertions(+), 16 deletions(-) diff --git a/pkg/ccl/importccl/import_processor.go b/pkg/ccl/importccl/import_processor.go index d8150c05ff0e..e9f24ca8ee19 100644 --- a/pkg/ccl/importccl/import_processor.go +++ b/pkg/ccl/importccl/import_processor.go @@ -145,6 +145,24 @@ func makeInputConverter( return nil, unimplemented.NewWithIssue(50225, "cannot import into table with partial indexes") } } + + // If we're using a format like CSV where data columns are not "named", and + // therefore cannot be mapped to schema columns, then require the user to + // use IMPORT INTO. + // + // We could potentially do something smarter here and check that only a + // suffix of the columns are computed, and then expect the data file to have + // #(visible columns) - #(computed columns). + if len(singleTableTargetCols) == 0 && !formatHasNamedColumns(spec.Format.Format) { + for _, col := range singleTable.VisibleColumns() { + if col.IsComputed() { + // TODO(before merge): Replace this issue number with one that + // summarizes the above comment in a ticket. + return nil, unimplemented.NewWithIssueDetail(42846, "import.computed", + "to use computed columns, use IMPORT INTO") + } + } + } } switch spec.Format.Format { diff --git a/pkg/ccl/importccl/import_stmt_test.go b/pkg/ccl/importccl/import_stmt_test.go index eb6596f7031f..117bd13235b2 100644 --- a/pkg/ccl/importccl/import_stmt_test.go +++ b/pkg/ccl/importccl/import_stmt_test.go @@ -3747,8 +3747,8 @@ func TestImportComputed(t *testing.T) { } avroData := createAvroData(t, "t", avroField, avroRows) pgdumpData := ` -CREATE TABLE users (a INT, b INT, c INT AS (a + b) STORED); -INSERT INTO users (a, b) VALUES (1, 2), (3, 4); +CREATE TABLE users (a INT, b INT, c INT AS (a + b) STORED); +INSERT INTO users (a, b) VALUES (1, 2), (3, 4); ` defer srv.Close() tests := []struct { @@ -3812,18 +3812,16 @@ INSERT INTO users (a, b) VALUES (1, 2), (3, 4); name: "import-table-csv", data: "35,23\n67,10", create: "a INT, c INT AS (a + b) STORED, b INT", - targetCols: "a, b", format: "CSV", - expectedError: "requires targeted column specification", + expectedError: "to use computed columns, use IMPORT INTO", }, { into: false, name: "import-table-avro", data: avroData, - create: "a INT, b INT, c INT AS (a + b) STORED", - targetCols: "a, b", + create: "a INT, c INT AS (a + b) STORED, b INT", format: "AVRO", - expectedResults: [][]string{{"1", "2", "3"}, {"3", "4", "7"}}, + expectedResults: [][]string{{"1", "3", "2"}, {"3", "7", "4"}}, }, { into: false, diff --git a/pkg/ccl/importccl/read_import_base.go b/pkg/ccl/importccl/read_import_base.go index 86f2fe602a74..488a9a61fbb8 100644 --- a/pkg/ccl/importccl/read_import_base.go +++ b/pkg/ccl/importccl/read_import_base.go @@ -345,6 +345,18 @@ type inputConverter interface { format roachpb.IOFileFormat, makeExternalStorage cloud.ExternalStorageFactory, user string) error } +// formatHasNamedColumns returns true if the data in the input files can be +// mapped specifically to a particular data column. +func formatHasNamedColumns(format roachpb.IOFileFormat_FileFormat) bool { + switch format { + case roachpb.IOFileFormat_Avro, + roachpb.IOFileFormat_Mysqldump, + roachpb.IOFileFormat_PgDump: + return true + } + return false +} + func isMultiTableFormat(format roachpb.IOFileFormat_FileFormat) bool { switch format { case roachpb.IOFileFormat_Mysqldump, diff --git a/pkg/ccl/importccl/read_import_csv.go b/pkg/ccl/importccl/read_import_csv.go index d3517ad57f67..d1fbf7486a26 100644 --- a/pkg/ccl/importccl/read_import_csv.go +++ b/pkg/ccl/importccl/read_import_csv.go @@ -134,15 +134,6 @@ func (p *csvRowProducer) Row() (interface{}, error) { p.rowNum++ expectedColsLen := len(p.expectedColumns) if expectedColsLen == 0 { - // TODO(anzoteh96): this should really be only checked once per import instead of every row. - for _, col := range p.importCtx.tableDesc.VisibleColumns() { - if col.IsComputed() { - return nil, - errors.Newf( - "IMPORT CSV with computed column %q requires targeted column specification", - col.Name) - } - } expectedColsLen = len(p.importCtx.tableDesc.VisibleColumns()) }