Skip to content

Commit 91dff98

Browse files
authored
Parquet: Log corrupted parquet filenames to trace bad nodes that may have written them. (#13108)
1 parent 1911c94 commit 91dff98

File tree

1 file changed

+27
-11
lines changed

1 file changed

+27
-11
lines changed

parquet/src/main/java/org/apache/iceberg/parquet/ParquetReader.java

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,10 @@
3232
import org.apache.parquet.ParquetReadOptions;
3333
import org.apache.parquet.column.page.PageReadStore;
3434
import org.apache.parquet.hadoop.ParquetFileReader;
35+
import org.apache.parquet.io.ParquetDecodingException;
3536
import org.apache.parquet.schema.MessageType;
37+
import org.slf4j.Logger;
38+
import org.slf4j.LoggerFactory;
3639

3740
public class ParquetReader<T> extends CloseableGroup implements CloseableIterable<T> {
3841
private final InputFile input;
@@ -94,6 +97,8 @@ public CloseableIterator<T> iterator() {
9497
}
9598

9699
private static class FileIterator<T> implements CloseableIterator<T> {
100+
private static final Logger LOG = LoggerFactory.getLogger(FileIterator.class);
101+
97102
private final ParquetFileReader reader;
98103
private final boolean[] shouldSkip;
99104
private final ParquetValueReader<T> model;
@@ -120,18 +125,29 @@ public boolean hasNext() {
120125

121126
@Override
122127
public T next() {
123-
if (valuesRead >= nextRowGroupStart) {
124-
advance();
125-
}
126-
127-
if (reuseContainers) {
128-
this.last = model.read(last);
129-
} else {
130-
this.last = model.read(null);
128+
try {
129+
if (valuesRead >= nextRowGroupStart) {
130+
advance();
131+
}
132+
133+
if (reuseContainers) {
134+
this.last = model.read(last);
135+
} else {
136+
this.last = model.read(null);
137+
}
138+
139+
valuesRead += 1;
140+
141+
return last;
142+
} catch (ParquetDecodingException e) {
143+
if (reader != null) {
144+
// Knowing the exact parquet file is essential for tracing bad nodes
145+
// that produced the corrupt file, parquet lib doesn't do this today.
146+
LOG.error("Error decoding Parquet file {}", reader.getFile(), e);
147+
}
148+
149+
throw e;
131150
}
132-
valuesRead += 1;
133-
134-
return last;
135151
}
136152

137153
private void advance() {

0 commit comments

Comments
 (0)