Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/generated/settings/settings.html
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
<tr><td><code>kv.transaction.write_pipelining_max_batch_size</code></td><td>integer</td><td><code>128</code></td><td>if non-zero, defines that maximum size batch that will be pipelined through Raft consensus</td></tr>
<tr><td><code>kv.transaction.write_pipelining_max_outstanding_size</code></td><td>byte size</td><td><code>256 KiB</code></td><td>maximum number of bytes used to track in-flight pipelined writes before disabling pipelining</td></tr>
<tr><td><code>rocksdb.min_wal_sync_interval</code></td><td>duration</td><td><code>0s</code></td><td>minimum duration between syncs of the RocksDB WAL</td></tr>
<tr><td><code>schemachanger.backfiller.buffer_size</code></td><td>byte size</td><td><code>196 MiB</code></td><td>amount to buffer in memory during backfills</td></tr>
<tr><td><code>schemachanger.backfiller.max_sst_size</code></td><td>byte size</td><td><code>16 MiB</code></td><td>target size for ingested files during backfills</td></tr>
<tr><td><code>schemachanger.bulk_index_backfill.batch_size</code></td><td>integer</td><td><code>5000000</code></td><td>number of rows to process at a time during bulk index backfill</td></tr>
<tr><td><code>schemachanger.bulk_index_backfill.enabled</code></td><td>boolean</td><td><code>true</code></td><td>backfill indexes in bulk via addsstable</td></tr>
<tr><td><code>schemachanger.lease.duration</code></td><td>duration</td><td><code>5m0s</code></td><td>the duration of a schema change lease</td></tr>
Expand Down
25 changes: 13 additions & 12 deletions pkg/ccl/importccl/read_import_proc.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,12 @@
package importccl

import (
"bytes"
"compress/bzip2"
"compress/gzip"
"context"
"io"
"io/ioutil"
"math/rand"
"sort"
"strings"

"github.com/cockroachdb/cockroach/pkg/ccl/storageccl"
Expand Down Expand Up @@ -500,11 +498,12 @@ func (cp *readImportDataProcessor) doRun(ctx context.Context) error {
defer tracing.FinishSpan(span)

writeTS := hlc.Timestamp{WallTime: cp.spec.WalltimeNanos}
adder, err := cp.flowCtx.BulkAdder(ctx, cp.flowCtx.ClientDB, 32<<20 /* flush at 32mb */, writeTS)
const bufferSize, flushSize = 64 << 20, 16 << 20
adder, err := cp.flowCtx.BulkAdder(ctx, cp.flowCtx.ClientDB, bufferSize, flushSize, writeTS)
if err != nil {
return err
}
defer adder.Close()
defer adder.Close(ctx)

// Drain the kvCh using the BulkAdder until it closes.
if err := ingestKvs(ctx, adder, kvCh); err != nil {
Expand Down Expand Up @@ -655,20 +654,15 @@ func ingestKvs(ctx context.Context, adder storagebase.BulkAdder, kvCh <-chan kvB
if len(buf) == 0 {
return nil
}
sort.Sort(buf)
for i := range buf {
if err := adder.Add(ctx, buf[i].Key, buf[i].Value.RawBytes); err != nil {
if i > 0 && bytes.Equal(buf[i].Key, buf[i-1].Key) {
return pgerror.Wrapf(err, pgerror.CodeDataExceptionError,
errSSTCreationMaybeDuplicateTemplate, buf[i].Key)
if _, ok := err.(storagebase.DuplicateKeyError); ok {
return pgerror.Wrap(err, pgerror.CodeDataExceptionError, "")
}
return err
}
}
if err := adder.Flush(ctx); err != nil {
return err
}
return adder.Reset()
return nil
}

for kvBatch := range kvCh {
Expand Down Expand Up @@ -702,6 +696,13 @@ func ingestKvs(ctx context.Context, adder storagebase.BulkAdder, kvCh <-chan kvB
return err
}
}

if err := adder.Flush(ctx); err != nil {
if err, ok := err.(storagebase.DuplicateKeyError); ok {
return pgerror.Wrap(err, pgerror.CodeDataExceptionError, "")
}
return err
}
return nil
}

Expand Down
4 changes: 2 additions & 2 deletions pkg/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -525,8 +525,8 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) {
ClusterID: &s.rpcContext.ClusterID,

TempStorage: tempEngine,
BulkAdder: func(ctx context.Context, db *client.DB, size int64, ts hlc.Timestamp) (storagebase.BulkAdder, error) {
return bulk.MakeFixedTimestampSSTBatcher(db, s.distSender.RangeDescriptorCache(), size, ts)
BulkAdder: func(ctx context.Context, db *client.DB, bufferSize, flushSize int64, ts hlc.Timestamp) (storagebase.BulkAdder, error) {
return bulk.MakeBulkAdder(db, s.distSender.RangeDescriptorCache(), bufferSize, flushSize, ts)
},
DiskMonitor: s.cfg.TempStorageConfig.Mon,

Expand Down
21 changes: 19 additions & 2 deletions pkg/sql/distsqlrun/backfiller.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ import (
)

type chunkBackfiller interface {
// prepare must be called before runChunk.
prepare(ctx context.Context) error

// close should always be called to close a backfiller if prepare() was called.
close(ctx context.Context)

// runChunk returns the next-key and an error. next-key is nil
// once the backfill is complete.
runChunk(
Expand All @@ -44,12 +50,15 @@ type chunkBackfiller interface {
chunkSize int64,
readAsOf hlc.Timestamp,
) (roachpb.Key, error)

// flush must be called after the last chunk to finish buffered work.
flush(ctx context.Context) error
}

// backfiller is a processor that implements a distributed backfill of
// an entity, like indexes or columns, during a schema change.
type backfiller struct {
chunkBackfiller
chunks chunkBackfiller
// name is the name of the kind of entity this backfiller processes.
name string
// mutationFilter returns true if the mutation should be processed by the
Expand Down Expand Up @@ -116,6 +125,11 @@ func (b *backfiller) mainLoop(ctx context.Context) error {
// Backfill the mutations for all the rows.
chunkSize := b.spec.ChunkSize
start := timeutil.Now()

if err := b.chunks.prepare(ctx); err != nil {
return err
}

var resume roachpb.Span
sp := work
var nChunks, row = 0, int64(0)
Expand All @@ -125,7 +139,7 @@ func (b *backfiller) mainLoop(ctx context.Context) error {
b.name, desc.ID, mutationID, row, sp)
}
var err error
sp.Key, err = b.runChunk(ctx, mutations, sp, chunkSize, b.spec.ReadAsOf)
sp.Key, err = b.chunks.runChunk(ctx, mutations, sp, chunkSize, b.spec.ReadAsOf)
if err != nil {
return err
}
Expand All @@ -134,6 +148,9 @@ func (b *backfiller) mainLoop(ctx context.Context) error {
break
}
}
if err := b.chunks.flush(ctx); err != nil {
return err
}
log.VEventf(ctx, 2, "processed %d rows in %d chunks", row, nChunks)
return WriteResumeSpan(ctx,
b.flowCtx.ClientDB,
Expand Down
10 changes: 9 additions & 1 deletion pkg/sql/distsqlrun/columnbackfiller.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func newColumnBackfiller(
spec: spec,
},
}
cb.backfiller.chunkBackfiller = cb
cb.backfiller.chunks = cb

if err := cb.ColumnBackfiller.Init(cb.flowCtx.NewEvalCtx(), cb.desc); err != nil {
return nil, err
Expand All @@ -70,6 +70,14 @@ func newColumnBackfiller(
return cb, nil
}

func (cb *columnBackfiller) close(ctx context.Context) {}
func (cb *columnBackfiller) prepare(ctx context.Context) error {
return nil
}
func (cb *columnBackfiller) flush(ctx context.Context) error {
return nil
}

// runChunk implements the chunkBackfiller interface.
func (cb *columnBackfiller) runChunk(
ctx context.Context,
Expand Down
98 changes: 60 additions & 38 deletions pkg/sql/distsqlrun/indexbackfiller.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@ package distsqlrun

import (
"context"
"sort"

"github.com/cockroachdb/cockroach/pkg/internal/client"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/settings"
"github.com/cockroachdb/cockroach/pkg/sql/backfill"
"github.com/cockroachdb/cockroach/pkg/sql/distsqlpb"
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
"github.com/cockroachdb/cockroach/pkg/sql/row"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/storage/storagebase"
"github.com/cockroachdb/cockroach/pkg/util/hlc"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
Expand All @@ -37,12 +38,22 @@ type indexBackfiller struct {

backfill.IndexBackfiller

adder storagebase.BulkAdder

desc *sqlbase.ImmutableTableDescriptor
}

var _ Processor = &indexBackfiller{}
var _ chunkBackfiller = &indexBackfiller{}

var backfillerBufferSize = settings.RegisterByteSizeSetting(
"schemachanger.backfiller.buffer_size", "amount to buffer in memory during backfills", 196<<20,
)

var backillerSSTSize = settings.RegisterByteSizeSetting(
"schemachanger.backfiller.max_sst_size", "target size for ingested files during backfills", 16<<20,
)

func newIndexBackfiller(
flowCtx *FlowCtx,
processorID int32,
Expand All @@ -61,7 +72,7 @@ func newIndexBackfiller(
spec: spec,
},
}
ib.backfiller.chunkBackfiller = ib
ib.backfiller.chunks = ib

if err := ib.IndexBackfiller.Init(ib.desc); err != nil {
return nil, err
Expand All @@ -70,6 +81,44 @@ func newIndexBackfiller(
return ib, nil
}

func (ib *indexBackfiller) prepare(ctx context.Context) error {
bufferSize := backfillerBufferSize.Get(&ib.flowCtx.Settings.SV)
sstSize := backillerSSTSize.Get(&ib.flowCtx.Settings.SV)
adder, err := ib.flowCtx.BulkAdder(ctx, ib.flowCtx.ClientDB, bufferSize, sstSize, ib.spec.ReadAsOf)
if err != nil {
return err
}
ib.adder = adder
ib.adder.SkipLocalDuplicates(ib.ContainsInvertedIndex())
return nil
}

func (ib indexBackfiller) close(ctx context.Context) {
ib.adder.Close(ctx)
}

func (ib *indexBackfiller) flush(ctx context.Context) error {
return ib.wrapDupError(ctx, ib.adder.Flush(ctx))
}

func (ib *indexBackfiller) wrapDupError(ctx context.Context, orig error) error {
if orig == nil {
return nil
}
typed, ok := orig.(storagebase.DuplicateKeyError)
if !ok {
return orig
}

desc, err := ib.desc.MakeFirstMutationPublic()
immutable := sqlbase.NewImmutableTableDescriptor(*desc.TableDesc())
if err != nil {
return err
}
v := &roachpb.Value{RawBytes: typed.Value}
return row.NewUniquenessConstraintViolationError(ctx, immutable, typed.Key, v)
}

func (ib *indexBackfiller) runChunk(
tctx context.Context,
mutations []sqlbase.DescriptorMutation,
Expand Down Expand Up @@ -129,50 +178,23 @@ func (ib *indexBackfiller) runChunk(
enabled := backfill.BulkWriteIndex.Get(&ib.flowCtx.Settings.SV)
if enabled {
start := timeutil.Now()
sort.Slice(entries, func(i, j int) bool {
return entries[i].Key.Compare(entries[j].Key) < 0
})
sortTime := timeutil.Now().Sub(start)

start = timeutil.Now()
adder, err := ib.flowCtx.BulkAdder(ctx, ib.flowCtx.ClientDB, 32<<20, readAsOf)
if err != nil {
return nil, err
for _, i := range entries {
if err := ib.adder.Add(ctx, i.Key, i.Value.RawBytes); err != nil {
return nil, ib.wrapDupError(ctx, err)
}
}
defer adder.Close()
containsInvertedIndex := ib.ContainsInvertedIndex()
for i := range entries {
if err := adder.Add(ctx, entries[i].Key, entries[i].Value.RawBytes); err != nil {
// Detect a duplicate within the SST being constructed. This is an
// insufficient but useful method for unique constraint enforcement
// and the index has to be validated after construction.
if i > 0 && entries[i-1].Key.Equal(entries[i].Key) {
if containsInvertedIndex {
// Depend on post index backfill validation to catch any errors.
continue
}
desc, err := ib.desc.MakeFirstMutationPublic()
immutable := sqlbase.NewImmutableTableDescriptor(*desc.TableDesc())
if err != nil {
return nil, err
}
entry := entries[i]
return nil, row.NewUniquenessConstraintViolationError(
ctx, immutable, entry.Key, &entry.Value)
}
return nil, err
if ib.flowCtx.testingKnobs.RunAfterBackfillChunk != nil {
if err := ib.adder.Flush(ctx); err != nil {
return nil, ib.wrapDupError(ctx, err)
}
}
addTime := timeutil.Now().Sub(start)

if err := adder.Flush(ctx); err != nil {
return nil, err
}

// Don't log perf stats in tests with small indexes.
if len(entries) > 1000 {
log.Infof(ctx, "index backfill stats: entries %d, prepare %+v, sort %+v, add-sst %+v",
len(entries), prepTime, sortTime, addTime)
log.Infof(ctx, "index backfill stats: entries %d, prepare %+v, add-sst %+v",
len(entries), prepTime, addTime)
}
return key, nil
}
Expand Down
4 changes: 4 additions & 0 deletions pkg/sql/schema_changer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1516,6 +1516,10 @@ func TestSchemaChangePurgeFailure(t *testing.T) {
}
return nil
},
// the backfiller flushes after every batch if RunAfterBackfillChunk is
// non-nil so this noop fn means we can observe the partial-backfill that
// would otherwise just be buffered.
RunAfterBackfillChunk: func() {},
},
// Disable backfill migrations, we still need the jobs table migration.
SQLMigrationManager: &sqlmigrations.MigrationManagerTestingKnobs{
Expand Down
2 changes: 1 addition & 1 deletion pkg/storage/batcheval/cmd_add_sstable.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ func EvalAddSSTable(
return result.Result{}, errors.Wrap(err, "computing existing stats")
}
ms.Subtract(existingStats)
if existingStats.KeyCount > 0 {
if log.V(2) {
log.Infof(ctx, "%s SST covers span containing %d existing keys: [%s, %s)", humanizeutil.IBytes(int64(len(args.Data))), existingStats.KeyCount, args.Key, args.EndKey)
}

Expand Down
Loading