3232import org .apache .parquet .ParquetReadOptions ;
3333import org .apache .parquet .column .page .PageReadStore ;
3434import org .apache .parquet .hadoop .ParquetFileReader ;
35+ import org .apache .parquet .io .ParquetDecodingException ;
3536import org .apache .parquet .schema .MessageType ;
37+ import org .slf4j .Logger ;
38+ import org .slf4j .LoggerFactory ;
3639
3740public class ParquetReader <T > extends CloseableGroup implements CloseableIterable <T > {
3841 private final InputFile input ;
@@ -94,6 +97,8 @@ public CloseableIterator<T> iterator() {
9497 }
9598
9699 private static class FileIterator <T > implements CloseableIterator <T > {
100+ private static final Logger LOG = LoggerFactory .getLogger (FileIterator .class );
101+
97102 private final ParquetFileReader reader ;
98103 private final boolean [] shouldSkip ;
99104 private final ParquetValueReader <T > model ;
@@ -120,18 +125,29 @@ public boolean hasNext() {
120125
121126 @ Override
122127 public T next () {
123- if (valuesRead >= nextRowGroupStart ) {
124- advance ();
125- }
126-
127- if (reuseContainers ) {
128- this .last = model .read (last );
129- } else {
130- this .last = model .read (null );
128+ try {
129+ if (valuesRead >= nextRowGroupStart ) {
130+ advance ();
131+ }
132+
133+ if (reuseContainers ) {
134+ this .last = model .read (last );
135+ } else {
136+ this .last = model .read (null );
137+ }
138+
139+ valuesRead += 1 ;
140+
141+ return last ;
142+ } catch (ParquetDecodingException e ) {
143+ if (reader != null ) {
144+ // Knowing the exact parquet file is essential for tracing bad nodes
145+ // that produced the corrupt file, parquet lib doesn't do this today.
146+ LOG .error ("Error decoding Parquet file {}" , reader .getFile (), e );
147+ }
148+
149+ throw e ;
131150 }
132- valuesRead += 1 ;
133-
134- return last ;
135151 }
136152
137153 private void advance () {
0 commit comments