Skip to content

Commit 60e36a4

Browse files
fix: list_columns.parquet testing (#378)
### Rationale for this change add list_columns.parquet testing. `rr.Next()` is returning true when we've already reached the end of the file. This leads to a panic later on ### What changes are included in this PR? - Test that proves list_columns.parquet is not working as expected. - Use known path for partquet_testing if var is not defined. This ensures test can be run as usual but still enables custom locations for parquet_test_data. It also guarantees failure if not run correctly. ### Are these changes tested? That is the goal of the PR ### Are there any user-facing changes? no?
1 parent 1bdd0ad commit 60e36a4

File tree

2 files changed

+62
-7
lines changed

2 files changed

+62
-7
lines changed

parquet/file/file_reader_test.go

Lines changed: 61 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,8 @@ func TestDeltaLengthByteArrayPackingWithNulls(t *testing.T) {
397397
func TestRleBooleanEncodingFileRead(t *testing.T) {
398398
dir := os.Getenv("PARQUET_TEST_DATA")
399399
if dir == "" {
400-
t.Skip("no path supplied with PARQUET_TEST_DATA")
400+
dir = "../../parquet-testing/data"
401+
t.Log("PARQUET_TEST_DATA not set, using ../../parquet-testing/data")
401402
}
402403
assert.DirExists(t, dir)
403404

@@ -473,7 +474,8 @@ func (m *mockBadReader) ReadAt(p []byte, off int64) (n int, err error) {
473474
func TestBadReader(t *testing.T) {
474475
dir := os.Getenv("PARQUET_TEST_DATA")
475476
if dir == "" {
476-
t.Skip("no path supplied with PARQUET_TEST_DATA")
477+
dir = "../../parquet-testing/data"
478+
t.Log("PARQUET_TEST_DATA not set, using ../../parquet-testing/data")
477479
}
478480
require.DirExists(t, dir)
479481

@@ -505,7 +507,8 @@ func TestBadReader(t *testing.T) {
505507
func TestByteStreamSplitEncodingFileRead(t *testing.T) {
506508
dir := os.Getenv("PARQUET_TEST_DATA")
507509
if dir == "" {
508-
t.Skip("no path supplied with PARQUET_TEST_DATA")
510+
dir = "../../parquet-testing/data"
511+
t.Log("PARQUET_TEST_DATA not set, using ../../parquet-testing/data")
509512
}
510513
require.DirExists(t, dir)
511514

@@ -700,7 +703,8 @@ func TestDeltaBinaryPackedMultipleBatches(t *testing.T) {
700703
func TestLZ4RawFileRead(t *testing.T) {
701704
dir := os.Getenv("PARQUET_TEST_DATA")
702705
if dir == "" {
703-
t.Skip("no path supplied with PARQUET_TEST_DATA")
706+
dir = "../../parquet-testing/data"
707+
t.Log("PARQUET_TEST_DATA not set, using ../../parquet-testing/data")
704708
}
705709
require.DirExists(t, dir)
706710

@@ -783,7 +787,8 @@ func TestLZ4RawFileRead(t *testing.T) {
783787
func TestLZ4RawLargerFileRead(t *testing.T) {
784788
dir := os.Getenv("PARQUET_TEST_DATA")
785789
if dir == "" {
786-
t.Skip("no path supplied with PARQUET_TEST_DATA")
790+
dir = "../../parquet-testing/data"
791+
t.Log("PARQUET_TEST_DATA not set, using ../../parquet-testing/data")
787792
}
788793
require.DirExists(t, dir)
789794

@@ -825,7 +830,8 @@ func TestLZ4RawLargerFileRead(t *testing.T) {
825830
func TestDeltaByteArray(t *testing.T) {
826831
dir := os.Getenv("PARQUET_TEST_DATA")
827832
if dir == "" {
828-
t.Skip("no path supplied with PARQUET_TEST_DATA")
833+
dir = "../../parquet-testing/data"
834+
t.Log("PARQUET_TEST_DATA not set, using ../../parquet-testing/data")
829835
}
830836
require.DirExists(t, dir)
831837

@@ -872,3 +878,52 @@ func TestDeltaByteArray(t *testing.T) {
872878
}
873879
}
874880
}
881+
882+
func TestListColumns(t *testing.T) {
883+
dir := os.Getenv("PARQUET_TEST_DATA")
884+
if dir == "" {
885+
dir = "../../parquet-testing/data"
886+
t.Log("PARQUET_TEST_DATA not set, using ../../parquet-testing/data")
887+
}
888+
require.DirExists(t, dir)
889+
890+
records := [][]string{
891+
{"[1,2,3]", "[\"abc\",\"efg\",\"hij\"]"},
892+
{"[null,1]", ""},
893+
{"[4]", "[\"efg\",null,\"hij\",\"xyz\"]"},
894+
}
895+
896+
props := parquet.NewReaderProperties(memory.DefaultAllocator)
897+
fileReader, err := file.OpenParquetFile(path.Join(dir, "list_columns.parquet"),
898+
false, file.WithReadProps(props))
899+
require.NoError(t, err)
900+
defer fileReader.Close()
901+
902+
nrows := fileReader.MetaData().NumRows
903+
assert.Equal(t, nrows, int64(len(records)), "expected %d rows, got %d", len(records), nrows)
904+
905+
arrowReader, err := pqarrow.NewFileReader(
906+
fileReader,
907+
pqarrow.ArrowReadProperties{BatchSize: 1024},
908+
memory.DefaultAllocator,
909+
)
910+
require.NoError(t, err)
911+
912+
rr, err := arrowReader.GetRecordReader(context.Background(), nil, nil)
913+
require.NoError(t, err)
914+
defer rr.Release()
915+
916+
for rr.Next() {
917+
rec := rr.Record()
918+
for i := range int(rec.NumCols()) {
919+
vals := rec.Column(i)
920+
for j := range vals.Len() {
921+
if vals.IsNull(j) {
922+
require.Equal(t, records[j][i], "")
923+
continue
924+
}
925+
require.Equal(t, records[j][i], vals.ValueStr(j))
926+
}
927+
}
928+
}
929+
}

parquet/pqarrow/column_readers.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ func (lr *leafReader) IsOrHasRepeatedChild() bool { return false }
9393

9494
func (lr *leafReader) LoadBatch(nrecords int64) (err error) {
9595
lr.releaseOut()
96-
lr.recordRdr.ResetValues()
96+
lr.recordRdr.Reset()
9797

9898
if err := lr.recordRdr.Reserve(nrecords); err != nil {
9999
return err

0 commit comments

Comments
 (0)