diff --git a/pkg/util/parquet/write_functions.go b/pkg/util/parquet/write_functions.go index 1c0d8c6bbcba..7c3818470844 100644 --- a/pkg/util/parquet/write_functions.go +++ b/pkg/util/parquet/write_functions.go @@ -143,31 +143,27 @@ var tupleFieldNonNilDefLevel = []int16{2} // and writing it to a file.ColumnChunkWriter. For tuples, there is a // file.ColumnChunkWriter per tuple field. type colWriter interface { - Write(d tree.Datum, w []file.ColumnChunkWriter, a *batchAlloc, fmtCtx *tree.FmtCtx) error + Write(d tree.Datum, w []file.ColumnChunkWriter, a *batchAlloc) error } type scalarWriter writeFn -func (w scalarWriter) Write( - d tree.Datum, cw []file.ColumnChunkWriter, a *batchAlloc, fmtCtx *tree.FmtCtx, -) error { +func (w scalarWriter) Write(d tree.Datum, cw []file.ColumnChunkWriter, a *batchAlloc) error { if len(cw) != 1 { return errors.AssertionFailedf("invalid number of column chunk writers in scalar writer: %d", len(cw)) } - return writeScalar(d, cw[0], a, fmtCtx, writeFn(w)) + return writeScalar(d, cw[0], a, writeFn(w)) } -func writeScalar( - d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, fmtCtx *tree.FmtCtx, wFn writeFn, -) error { +func writeScalar(d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, wFn writeFn) error { if d == tree.DNull { - if err := wFn(tree.DNull, w, a, fmtCtx, nilDefLevel, newEntryRepLevel); err != nil { + if err := wFn(tree.DNull, w, a, nilDefLevel, newEntryRepLevel); err != nil { return err } return nil } - if err := wFn(d, w, a, fmtCtx, nonNilDefLevel, newEntryRepLevel); err != nil { + if err := wFn(d, w, a, nonNilDefLevel, newEntryRepLevel); err != nil { return err } return nil @@ -175,27 +171,23 @@ func writeScalar( type arrayWriter writeFn -func (w arrayWriter) Write( - d tree.Datum, cw []file.ColumnChunkWriter, a *batchAlloc, fmtCtx *tree.FmtCtx, -) error { +func (w arrayWriter) Write(d tree.Datum, cw []file.ColumnChunkWriter, a *batchAlloc) error { if len(cw) != 1 { return errors.AssertionFailedf("invalid number of column chunk writers in array writer: %d", len(cw)) } - return writeArray(d, cw[0], a, fmtCtx, writeFn(w)) + return writeArray(d, cw[0], a, writeFn(w)) } -func writeArray( - d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, fmtCtx *tree.FmtCtx, wFn writeFn, -) error { +func writeArray(d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, wFn writeFn) error { if d == tree.DNull { - return wFn(tree.DNull, w, a, nil, nilArrayDefLevel, newEntryRepLevel) + return wFn(tree.DNull, w, a, nilArrayDefLevel, newEntryRepLevel) } di, ok := tree.AsDArray(d) if !ok { return pgerror.Newf(pgcode.DatatypeMismatch, "expected DArray, found %T", d) } if len(di.Array) == 0 { - return wFn(tree.DNull, w, a, nil, zeroLengthArrayDefLevel, newEntryRepLevel) + return wFn(tree.DNull, w, a, zeroLengthArrayDefLevel, newEntryRepLevel) } repLevel := newEntryRepLevel @@ -204,11 +196,11 @@ func writeArray( repLevel = arrayEntryRepLevel } if childDatum == tree.DNull { - if err := wFn(childDatum, w, a, fmtCtx, arrayEntryNilDefLevel, repLevel); err != nil { + if err := wFn(childDatum, w, a, arrayEntryNilDefLevel, repLevel); err != nil { return err } } else { - if err := wFn(childDatum, w, a, fmtCtx, arrayEntryNonNilDefLevel, repLevel); err != nil { + if err := wFn(childDatum, w, a, arrayEntryNonNilDefLevel, repLevel); err != nil { return err } } @@ -218,23 +210,19 @@ func writeArray( type tupleWriter []writeFn -func (tw tupleWriter) Write( - d tree.Datum, cw []file.ColumnChunkWriter, a *batchAlloc, fmtCtx *tree.FmtCtx, -) error { +func (tw tupleWriter) Write(d tree.Datum, cw []file.ColumnChunkWriter, a *batchAlloc) error { if len(cw) != len(tw) { return errors.AssertionFailedf( "invalid number of column chunk writers (%d) for tuple writer (%d)", len(cw), len(tw)) } - return writeTuple(d, cw, a, fmtCtx, tw) + return writeTuple(d, cw, a, tw) } -func writeTuple( - d tree.Datum, w []file.ColumnChunkWriter, a *batchAlloc, fmtCtx *tree.FmtCtx, wFns []writeFn, -) error { +func writeTuple(d tree.Datum, w []file.ColumnChunkWriter, a *batchAlloc, wFns []writeFn) error { if d == tree.DNull { for i, wFn := range wFns { - if err := wFn(tree.DNull, w[i], a, fmtCtx, nilTupleDefLevel, newEntryRepLevel); err != nil { + if err := wFn(tree.DNull, w[i], a, nilTupleDefLevel, newEntryRepLevel); err != nil { return err } } @@ -249,7 +237,7 @@ func writeTuple( if dt.D[i] == tree.DNull { defLevel = tupleFieldNilDefLevel } - if err := wFn(dt.D[i], w[i], a, fmtCtx, defLevel, newEntryRepLevel); err != nil { + if err := wFn(dt.D[i], w[i], a, defLevel, newEntryRepLevel); err != nil { return err } } @@ -260,25 +248,27 @@ func writeTuple( // A writeFn encodes a datum and writes it using the provided column chunk // writer. The caller is responsible for ensuring that the def levels and rep // levels are correct. -type writeFn func(d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, fmtCtx *tree.FmtCtx, +type writeFn func(d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16) error // formatDatum writes the datum into the parquet.ByteArray batch alloc using the -// tree.NodeFormatter interface. It is important that the fmtCtx remains open -// until after the bytes have been read from the batchAlloc, otherwise the byte -// slice may point to invalid data. -func formatDatum(d tree.Datum, a *batchAlloc, fmtCtx *tree.FmtCtx) { - fmtCtx.Reset() +// tree.NodeFormatter interface. +func formatDatum(d tree.Datum, a *batchAlloc) error { + // tree.NewFmtCtx uses an underlying pool, so we can assume there is no + // allocation here. + fmtCtx := tree.NewFmtCtx(tree.FmtExport) d.Format(fmtCtx) - a.byteArrayBatch[0] = fmtCtx.Bytes() + // Allocates a new string representation of the datum. + b, err := unsafeGetBytes(fmtCtx.CloseAndGetString()) + if err != nil { + return err + } + a.byteArrayBatch[0] = b + return nil } func writeInt32( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[int32](w, a.int32Batch[:], defLevels, repLevels) @@ -292,11 +282,7 @@ func writeInt32( } func writeInt64( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[int64](w, a.int64Batch[:], defLevels, repLevels) @@ -310,11 +296,7 @@ func writeInt64( } func writeBool( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[bool](w, a.boolBatch[:], defLevels, repLevels) @@ -328,11 +310,7 @@ func writeBool( } func writeString( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -350,11 +328,15 @@ func writeString( return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } -// unsafeGetBytes returns []byte in the underlying string, -// without incurring copy. -// This unsafe mechanism is safe to use here because the returned bytes will -// be copied by the parquet library when writing a datum to a column chunk. -// See https://groups.google.com/g/golang-nuts/c/Zsfk-VMd_fU/m/O1ru4fO-BgAJ +// unsafeGetBytes returns []byte in the underlying string, without incurring +// copy. This is safe to use because neither the string nor byte array should be +// mutated concurrently. See +// https://groups.google.com/g/golang-nuts/c/Zsfk-VMd_fU/m/O1ru4fO-BgAJ +// +// There is no risk of the string being GC'd while the bytes are in use because +// unsafe.Pointer prevents the pointee from being garbage collected as long as +// the pointee is a Go struct. See +// https://groups.google.com/g/golang-nuts/c/yNis7bQG_rY/m/yaJFoSx1hgIJ // // TODO(jayant): once we upgrade to Go 1.20, we can replace this with a less unsafe // implementation. See https://www.sobyte.net/post/2022-09/string-byte-convertion/ @@ -378,11 +360,7 @@ func unsafeGetBytes(s string) ([]byte, error) { } func writeTimestamp( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -393,16 +371,14 @@ func writeTimestamp( return pgerror.Newf(pgcode.DatatypeMismatch, "expected DTimestamp, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeTimestampTZ( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -413,16 +389,14 @@ func writeTimestampTZ( return pgerror.Newf(pgcode.DatatypeMismatch, "expected DTimestampTZ, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeUUID( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.FixedLenByteArray](w, a.fixedLenByteArrayBatch[:], defLevels, repLevels) @@ -437,11 +411,7 @@ func writeUUID( } func writeDecimal( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -450,16 +420,14 @@ func writeDecimal( if !ok { return pgerror.Newf(pgcode.DatatypeMismatch, "expected DDecimal, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeINet( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -469,16 +437,14 @@ func writeINet( return pgerror.Newf(pgcode.DatatypeMismatch, "expected DIPAddr, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeJSON( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -488,16 +454,14 @@ func writeJSON( return pgerror.Newf(pgcode.DatatypeMismatch, "expected DJSON, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeBit( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -507,16 +471,14 @@ func writeBit( return pgerror.Newf(pgcode.DatatypeMismatch, "expected DBitArray, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeBytes( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -535,11 +497,7 @@ func writeBytes( } func writeEnum( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -558,11 +516,7 @@ func writeEnum( } func writeDate( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -572,16 +526,14 @@ func writeDate( return pgerror.Newf(pgcode.DatatypeMismatch, "expected DDate, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeBox2D( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -590,16 +542,14 @@ func writeBox2D( if !ok { return pgerror.Newf(pgcode.DatatypeMismatch, "expected DBox2D, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeGeography( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -614,11 +564,7 @@ func writeGeography( } func writeGeometry( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -632,11 +578,7 @@ func writeGeometry( } func writeInterval( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -646,16 +588,14 @@ func writeInterval( return pgerror.Newf(pgcode.DatatypeMismatch, "expected DInterval, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeTime( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[int64](w, a.int64Batch[:], defLevels, repLevels) @@ -669,11 +609,7 @@ func writeTime( } func writeTimeTZ( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - fmtCtx *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) @@ -682,16 +618,14 @@ func writeTimeTZ( if !ok { return pgerror.Newf(pgcode.DatatypeMismatch, "expected DTimeTZ, found %T", d) } - formatDatum(d, a, fmtCtx) + if err := formatDatum(d, a); err != nil { + return err + } return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) } func writeFloat32( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[float32](w, a.float32Batch[:], defLevels, repLevels) @@ -705,11 +639,7 @@ func writeFloat32( } func writeFloat64( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[float64](w, a.float64Batch[:], defLevels, repLevels) @@ -723,11 +653,7 @@ func writeFloat64( } func writeOid( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[int32](w, a.int32Batch[:], defLevels, repLevels) @@ -741,11 +667,7 @@ func writeOid( } func writeCollatedString( - d tree.Datum, - w file.ColumnChunkWriter, - a *batchAlloc, - _ *tree.FmtCtx, - defLevels, repLevels []int16, + d tree.Datum, w file.ColumnChunkWriter, a *batchAlloc, defLevels, repLevels []int16, ) error { if d == tree.DNull { return writeBatch[parquet.ByteArray](w, a.byteArrayBatch[:], defLevels, repLevels) diff --git a/pkg/util/parquet/writer.go b/pkg/util/parquet/writer.go index 3baafadb799d..4120e9d4574b 100644 --- a/pkg/util/parquet/writer.go +++ b/pkg/util/parquet/writer.go @@ -206,18 +206,6 @@ func (w *Writer) setNewRowGroupWriter() error { return nil } -func (w *Writer) writeDatumToColChunk(d tree.Datum, datumColIdx int) (err error) { - // tree.NewFmtCtx uses an underlying pool, so we can assume there is no - // allocation here. - fmtCtx := tree.NewFmtCtx(tree.FmtExport) - defer fmtCtx.Close() - if err = w.sch.cols[datumColIdx].colWriter.Write(d, w.columnChunkWriterCache[datumColIdx], w.ba, fmtCtx); err != nil { - return err - } - - return nil -} - // AddRow writes the supplied datums. There is no guarantee // that they will be flushed to the sink after AddRow returns. func (w *Writer) AddRow(datums []tree.Datum) error { @@ -240,7 +228,7 @@ func (w *Writer) AddRow(datums []tree.Datum) error { } for datumColIdx, d := range datums { - if err := w.writeDatumToColChunk(d, datumColIdx); err != nil { + if err := w.sch.cols[datumColIdx].colWriter.Write(d, w.columnChunkWriterCache[datumColIdx], w.ba); err != nil { return err } } diff --git a/pkg/util/parquet/writer_bench_test.go b/pkg/util/parquet/writer_bench_test.go index 2f965d71f0f1..3e550681a341 100644 --- a/pkg/util/parquet/writer_bench_test.go +++ b/pkg/util/parquet/writer_bench_test.go @@ -38,7 +38,7 @@ func BenchmarkParquetWriter(b *testing.B) { func(rng *rand.Rand) *types.T { return benchmarkTypes[i] }, rng) - datums := makeRandDatums(1, sch, rng) + datums := makeRandDatums(1, sch, rng, false) schemaDef, err := NewSchema(sch.columnNames, sch.columnTypes) require.NoError(b, err) diff --git a/pkg/util/parquet/writer_test.go b/pkg/util/parquet/writer_test.go index 754bbe1398e4..bfcf7978cac6 100644 --- a/pkg/util/parquet/writer_test.go +++ b/pkg/util/parquet/writer_test.go @@ -87,12 +87,12 @@ func randTestingType(rng *rand.Rand) *types.T { return typ } -func makeRandDatums(numRows int, sch *colSchema, rng *rand.Rand) [][]tree.Datum { +func makeRandDatums(numRows int, sch *colSchema, rng *rand.Rand, nullsAllowed bool) [][]tree.Datum { datums := make([][]tree.Datum, numRows) for i := 0; i < numRows; i++ { datums[i] = make([]tree.Datum, len(sch.columnTypes)) for j := 0; j < len(sch.columnTypes); j++ { - datums[i][j] = randgen.RandDatum(rng, sch.columnTypes[j], true) + datums[i][j] = randgen.RandDatum(rng, sch.columnTypes[j], nullsAllowed) } } return datums @@ -119,7 +119,7 @@ func TestRandomDatums(t *testing.T) { maxRowGroupSize := int64(8) sch := makeRandSchema(numCols, randTestingType, rng) - datums := makeRandDatums(numRows, sch, rng) + datums := makeRandDatums(numRows, sch, rng, true) fileName := "TestRandomDatums.parquet" f, err := os.CreateTemp("", fileName)