From e236d11327a1486702c43e11095a85302d1a0c8e Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Wed, 30 Jun 2021 22:41:00 -0700
Subject: [PATCH 01/14] Iceberg source split and split reader

---
 .../iceberg/flink/FlinkConfigOptions.java     |   6 +
 .../iceberg/flink/source/DataIterator.java    |  43 ++-
 .../flink/source/FlinkSplitGenerator.java     |  36 ++-
 .../apache/iceberg/flink/source/Position.java |  93 ++++++
 .../iceberg/flink/source/ScanContext.java     |  68 ++---
 .../reader/ArrayPoolDataIteratorBatcher.java  | 117 ++++++++
 .../source/reader/DataIteratorBatcher.java    |  33 +++
 .../reader/DataIteratorReaderFunction.java    |  47 +++
 .../flink/source/reader/FileRecords.java      | 101 +++++++
 .../reader/IcebergSourceReaderMetrics.java    |  60 ++++
 .../reader/IcebergSourceSplitReader.java      | 119 ++++++++
 .../flink/source/reader/ReaderFunction.java   |  33 +++
 .../flink/source/reader/RecordFactory.java    |  35 +++
 .../reader/RecyclableArrayIterator.java       |  85 ++++++
 .../reader/RowDataIteratorReaderFunction.java |  58 ++++
 .../source/reader/RowDataRecordFactory.java   |  58 ++++
 .../source/split/IcebergSourceSplit.java      | 128 ++++++++
 .../split/IcebergSourceSplitSerializer.java   |  74 +++++
 .../iceberg/flink/HadoopTableResource.java    |  82 ++++++
 .../source/reader/ReaderFunctionTestBase.java | 277 ++++++++++++++++++
 .../reader/TestIcebergSourceSplitReader.java  | 208 +++++++++++++
 .../reader/TestRecyclableArrayIterator.java   |  85 ++++++
 .../TestRowDataIteratorReaderFunction.java    |  67 +++++
 .../flink/source/split/SplitHelpers.java      |  96 ++++++
 .../TestIcebergSourceSplitSerializer.java     | 100 +++++++
 25 files changed, 2060 insertions(+), 49 deletions(-)
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/Position.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordFactory.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataRecordFactory.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
 create mode 100644 flink/src/test/java/org/apache/iceberg/flink/HadoopTableResource.java
 create mode 100644 flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
 create mode 100644 flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
 create mode 100644 flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRecyclableArrayIterator.java
 create mode 100644 flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataIteratorReaderFunction.java
 create mode 100644 flink/src/test/java/org/apache/iceberg/flink/source/split/SplitHelpers.java
 create mode 100644 flink/src/test/java/org/apache/iceberg/flink/source/split/TestIcebergSourceSplitSerializer.java

diff --git a/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java b/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
index 067abe8a6e41..d3fb0268fa4f 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
@@ -40,4 +40,10 @@ private FlinkConfigOptions() {
           .intType()
           .defaultValue(100)
           .withDescription("Sets max infer parallelism for source operator.");
+
+  public static final ConfigOption<Integer> SOURCE_READER_FETCH_BATCH_SIZE = ConfigOptions
+      .key("source.iceberg.reader.fetch-batch-size")
+      .intType()
+      .defaultValue(2048)
+      .withDescription("The target batch size for split reader fetch.");
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java b/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
index d470b0752304..67b719aba5cf 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
@@ -29,6 +29,7 @@
 import org.apache.iceberg.encryption.InputFilesDecryptor;
 import org.apache.iceberg.io.CloseableIterator;
 import org.apache.iceberg.io.FileIO;
+import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 
 /**
  * Flink data iterator that reads {@link CombinedScanTask} into a {@link CloseableIterator}
@@ -37,20 +38,46 @@
  */
 @Internal
 public class DataIterator<T> implements CloseableIterator<T> {
-
   private final FileScanTaskReader<T> fileScanTaskReader;
-
   private final InputFilesDecryptor inputFilesDecryptor;
+  private final CombinedScanTask combinedTask;
+
   private Iterator<FileScanTask> tasks;
   private CloseableIterator<T> currentIterator;
+  private Position position;
 
   public DataIterator(FileScanTaskReader<T> fileScanTaskReader, CombinedScanTask task,
                       FileIO io, EncryptionManager encryption) {
     this.fileScanTaskReader = fileScanTaskReader;
-
     this.inputFilesDecryptor = new InputFilesDecryptor(task, io, encryption);
+    this.combinedTask = task;
+
     this.tasks = task.files().iterator();
     this.currentIterator = CloseableIterator.empty();
+    // fileOffset starts at -1 because we started
+    // from an empty iterator that is not from the split files.
+    this.position = new Position(-1L, 0L);
+  }
+
+  public void seek(Position startingPosition) {
+    // skip files
+    Preconditions.checkArgument(startingPosition.fileOffset() < combinedTask.files().size(),
+        "Checkpointed file offset is %d, while CombinedScanTask has %d files",
+        startingPosition.fileOffset(), combinedTask.files().size());
+    for (long i = 0L; i < startingPosition.fileOffset(); ++i) {
+      tasks.next();
+    }
+    updateCurrentIterator();
+    // skip records within the file
+    for (long i = 0; i < startingPosition.recordOffset(); ++i) {
+      if (hasNext()) {
+        next();
+      } else {
+        throw new IllegalStateException("Not enough records to skip: " +
+            startingPosition.recordOffset());
+      }
+    }
+    this.position.update(startingPosition.fileOffset(), startingPosition.recordOffset());
   }
 
   @Override
@@ -62,9 +89,14 @@ public boolean hasNext() {
   @Override
   public T next() {
     updateCurrentIterator();
+    position.advanceRecord();
     return currentIterator.next();
   }
 
+  public boolean isCurrentIteratorDone() {
+    return !currentIterator.hasNext();
+  }
+
   /**
    * Updates the current iterator field to ensure that the current Iterator
    * is not exhausted.
@@ -74,6 +106,7 @@ private void updateCurrentIterator() {
       while (!currentIterator.hasNext() && tasks.hasNext()) {
         currentIterator.close();
         currentIterator = openTaskIterator(tasks.next());
+        position.advanceFile();
       }
     } catch (IOException e) {
       throw new UncheckedIOException(e);
@@ -90,4 +123,8 @@ public void close() throws IOException {
     currentIterator.close();
     tasks = null;
   }
+
+  public Position position() {
+    return position;
+  }
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java
index f495e0909b7e..1ba396c187e9 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java
@@ -27,23 +27,39 @@
 import org.apache.iceberg.TableProperties;
 import org.apache.iceberg.TableScan;
 import org.apache.iceberg.expressions.Expression;
+import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 
-class FlinkSplitGenerator {
+public class FlinkSplitGenerator {
   private FlinkSplitGenerator() {
   }
 
   static FlinkInputSplit[] createInputSplits(Table table, ScanContext context) {
-    List<CombinedScanTask> tasks = tasks(table, context);
-    FlinkInputSplit[] splits = new FlinkInputSplit[tasks.size()];
-    for (int i = 0; i < tasks.size(); i++) {
-      splits[i] = new FlinkInputSplit(i, tasks.get(i));
+    try (CloseableIterable<CombinedScanTask> tasksIterable = planTasks(table, context)) {
+      List<CombinedScanTask> tasks = Lists.newArrayList(tasksIterable);
+      FlinkInputSplit[] splits = new FlinkInputSplit[tasks.size()];
+      for (int i = 0; i < tasks.size(); i++) {
+        splits[i] = new FlinkInputSplit(i, tasks.get(i));
+      }
+      return splits;
+    } catch (IOException e) {
+      throw new UncheckedIOException("Failed to process tasks iterable", e);
+    }
+  }
+
+  public static List<IcebergSourceSplit> planIcebergSourceSplits(
+      Table table, ScanContext context) {
+    try (CloseableIterable<CombinedScanTask> tasksIterable = planTasks(table, context)) {
+      List<IcebergSourceSplit> splits = Lists.newArrayList();
+      tasksIterable.forEach(task -> splits.add(IcebergSourceSplit.fromCombinedScanTask(task)));
+      return splits;
+    } catch (IOException e) {
+      throw new UncheckedIOException("Failed to process task iterable: ", e);
     }
-    return splits;
   }
 
-  private static List<CombinedScanTask> tasks(Table table, ScanContext context) {
+  static CloseableIterable<CombinedScanTask> planTasks(Table table, ScanContext context) {
     TableScan scan = table
         .newScan()
         .caseSensitive(context.caseSensitive())
@@ -83,10 +99,6 @@ private static List<CombinedScanTask> tasks(Table table, ScanContext context) {
       }
     }
 
-    try (CloseableIterable<CombinedScanTask> tasksIterable = scan.planTasks()) {
-      return Lists.newArrayList(tasksIterable);
-    } catch (IOException e) {
-      throw new UncheckedIOException("Failed to close table scan: " + scan, e);
-    }
+    return scan.planTasks();
   }
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/Position.java b/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
new file mode 100644
index 000000000000..9ac2c89e0972
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
@@ -0,0 +1,93 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source;
+
+import java.io.Serializable;
+import java.util.Objects;
+import org.apache.iceberg.CombinedScanTask;
+import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
+
+/**
+ * A mutable class that defines the read position
+ * <ul>
+ *   <li>file offset in the list of files in a {@link CombinedScanTask}</li>
+ *   <li>record offset within a file</li>
+ * </ul>
+ */
+public class Position implements Serializable {
+
+  private static final long serialVersionUID = 1L;
+
+  private long fileOffset;
+  private long recordOffset;
+
+  public Position(long fileOffset, long recordOffset) {
+    this.fileOffset = fileOffset;
+    this.recordOffset = recordOffset;
+  }
+
+  void advanceFile() {
+    this.fileOffset += 1;
+    this.recordOffset = 0L;
+  }
+
+  void advanceRecord() {
+    this.recordOffset += 1L;
+  }
+
+  public void update(long newFileOffset, long newRecordOffset) {
+    this.fileOffset = newFileOffset;
+    this.recordOffset = newRecordOffset;
+  }
+
+  public long fileOffset() {
+    return fileOffset;
+  }
+
+  public long recordOffset() {
+    return recordOffset;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    final Position that = (Position) o;
+    return Objects.equals(fileOffset, that.fileOffset) &&
+        Objects.equals(recordOffset, that.recordOffset);
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(fileOffset, recordOffset);
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(this)
+        .add("fileOffset", fileOffset)
+        .add("recordOffset", recordOffset)
+        .toString();
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/ScanContext.java b/flink/src/main/java/org/apache/iceberg/flink/source/ScanContext.java
index 2896efb39655..b0336d70f179 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/ScanContext.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/ScanContext.java
@@ -34,7 +34,7 @@
 /**
  * Context object with optional arguments for a Flink Scan.
  */
-class ScanContext implements Serializable {
+public class ScanContext implements Serializable {
 
   private static final long serialVersionUID = 1L;
 
@@ -105,63 +105,63 @@ private ScanContext(boolean caseSensitive, Long snapshotId, Long startSnapshotId
     this.limit = limit;
   }
 
-  boolean caseSensitive() {
+  public boolean caseSensitive() {
     return caseSensitive;
   }
 
-  Long snapshotId() {
+  public Long snapshotId() {
     return snapshotId;
   }
 
-  Long startSnapshotId() {
+  public Long startSnapshotId() {
     return startSnapshotId;
   }
 
-  Long endSnapshotId() {
+  public Long endSnapshotId() {
     return endSnapshotId;
   }
 
-  Long asOfTimestamp() {
+  public Long asOfTimestamp() {
     return asOfTimestamp;
   }
 
-  Long splitSize() {
+  public Long splitSize() {
     return splitSize;
   }
 
-  Integer splitLookback() {
+  public Integer splitLookback() {
     return splitLookback;
   }
 
-  Long splitOpenFileCost() {
+  public Long splitOpenFileCost() {
     return splitOpenFileCost;
   }
 
-  boolean isStreaming() {
+  public boolean isStreaming() {
     return isStreaming;
   }
 
-  Duration monitorInterval() {
+  public Duration monitorInterval() {
     return monitorInterval;
   }
 
-  String nameMapping() {
+  public String nameMapping() {
     return nameMapping;
   }
 
-  Schema project() {
+  public Schema project() {
     return schema;
   }
 
-  List<Expression> filters() {
+  public List<Expression> filters() {
     return filters;
   }
 
-  long limit() {
+  public long limit() {
     return limit;
   }
 
-  ScanContext copyWithAppendsBetween(long newStartSnapshotId, long newEndSnapshotId) {
+  public ScanContext copyWithAppendsBetween(long newStartSnapshotId, long newEndSnapshotId) {
     return ScanContext.builder()
         .caseSensitive(caseSensitive)
         .useSnapshotId(null)
@@ -180,7 +180,7 @@ ScanContext copyWithAppendsBetween(long newStartSnapshotId, long newEndSnapshotI
         .build();
   }
 
-  ScanContext copyWithSnapshotId(long newSnapshotId) {
+  public ScanContext copyWithSnapshotId(long newSnapshotId) {
     return ScanContext.builder()
         .caseSensitive(caseSensitive)
         .useSnapshotId(newSnapshotId)
@@ -199,11 +199,11 @@ ScanContext copyWithSnapshotId(long newSnapshotId) {
         .build();
   }
 
-  static Builder builder() {
+  public static Builder builder() {
     return new Builder();
   }
 
-  static class Builder {
+  public static class Builder {
     private boolean caseSensitive = CASE_SENSITIVE.defaultValue();
     private Long snapshotId = SNAPSHOT_ID.defaultValue();
     private Long startSnapshotId = START_SNAPSHOT_ID.defaultValue();
@@ -222,77 +222,77 @@ static class Builder {
     private Builder() {
     }
 
-    Builder caseSensitive(boolean newCaseSensitive) {
+    public Builder caseSensitive(boolean newCaseSensitive) {
       this.caseSensitive = newCaseSensitive;
       return this;
     }
 
-    Builder useSnapshotId(Long newSnapshotId) {
+    public Builder useSnapshotId(Long newSnapshotId) {
       this.snapshotId = newSnapshotId;
       return this;
     }
 
-    Builder startSnapshotId(Long newStartSnapshotId) {
+    public Builder startSnapshotId(Long newStartSnapshotId) {
       this.startSnapshotId = newStartSnapshotId;
       return this;
     }
 
-    Builder endSnapshotId(Long newEndSnapshotId) {
+    public Builder endSnapshotId(Long newEndSnapshotId) {
       this.endSnapshotId = newEndSnapshotId;
       return this;
     }
 
-    Builder asOfTimestamp(Long newAsOfTimestamp) {
+    public Builder asOfTimestamp(Long newAsOfTimestamp) {
       this.asOfTimestamp = newAsOfTimestamp;
       return this;
     }
 
-    Builder splitSize(Long newSplitSize) {
+    public Builder splitSize(Long newSplitSize) {
       this.splitSize = newSplitSize;
       return this;
     }
 
-    Builder splitLookback(Integer newSplitLookback) {
+    public Builder splitLookback(Integer newSplitLookback) {
       this.splitLookback = newSplitLookback;
       return this;
     }
 
-    Builder splitOpenFileCost(Long newSplitOpenFileCost) {
+    public Builder splitOpenFileCost(Long newSplitOpenFileCost) {
       this.splitOpenFileCost = newSplitOpenFileCost;
       return this;
     }
 
-    Builder streaming(boolean streaming) {
+    public Builder streaming(boolean streaming) {
       this.isStreaming = streaming;
       return this;
     }
 
-    Builder monitorInterval(Duration newMonitorInterval) {
+    public Builder monitorInterval(Duration newMonitorInterval) {
       this.monitorInterval = newMonitorInterval;
       return this;
     }
 
-    Builder nameMapping(String newNameMapping) {
+    public Builder nameMapping(String newNameMapping) {
       this.nameMapping = newNameMapping;
       return this;
     }
 
-    Builder project(Schema newProjectedSchema) {
+    public Builder project(Schema newProjectedSchema) {
       this.projectedSchema = newProjectedSchema;
       return this;
     }
 
-    Builder filters(List<Expression> newFilters) {
+    public Builder filters(List<Expression> newFilters) {
       this.filters = newFilters;
       return this;
     }
 
-    Builder limit(long newLimit) {
+    public Builder limit(long newLimit) {
       this.limit = newLimit;
       return this;
     }
 
-    Builder fromProperties(Map<String, String> properties) {
+    public Builder fromProperties(Map<String, String> properties) {
       Configuration config = new Configuration();
       properties.forEach(config::setString);
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
new file mode 100644
index 000000000000..6b327898a8b1
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.base.source.reader.SourceReaderOptions;
+import org.apache.flink.connector.file.src.util.Pool;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.iceberg.flink.FlinkConfigOptions;
+import org.apache.iceberg.flink.source.DataIterator;
+import org.apache.iceberg.flink.source.Position;
+import org.apache.iceberg.io.CloseableIterator;
+
+class ArrayPoolDataIteratorBatcher<T> implements DataIteratorBatcher<T> {
+
+  private final Configuration config;
+  private final RecordFactory<T> recordFactory;
+
+  ArrayPoolDataIteratorBatcher(Configuration config, RecordFactory<T> recordFactory) {
+    this.config = config;
+    this.recordFactory = recordFactory;
+  }
+
+  @Override
+  public CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> apply(
+      String splitId, DataIterator<T> inputIterator) {
+    return new ArrayPoolBatchIterator(splitId, inputIterator);
+  }
+
+  private class ArrayPoolBatchIterator implements CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> {
+
+    private final String splitId;
+    private final DataIterator<T> inputIterator;
+    private final int batchSize;
+    private final Pool<T[]> pool;
+
+    ArrayPoolBatchIterator(String splitId, DataIterator<T> inputIterator) {
+      this.splitId = splitId;
+      this.inputIterator = inputIterator;
+      this.batchSize = config.getInteger(FlinkConfigOptions.SOURCE_READER_FETCH_BATCH_SIZE);
+      this.pool = createPoolOfBatches(config.getInteger(SourceReaderOptions.ELEMENT_QUEUE_CAPACITY));
+    }
+
+    @Override
+    public boolean hasNext() {
+      return inputIterator.hasNext();
+    }
+
+    @Override
+    public RecordsWithSplitIds<RecordAndPosition<T>> next() {
+      final T[] batch = getCachedEntry();
+      int num = 0;
+      while (inputIterator.hasNext() && num < batchSize) {
+        T nextRecord = inputIterator.next();
+        recordFactory.clone(nextRecord, batch[num]);
+        num++;
+        if (inputIterator.isCurrentIteratorDone()) {
+          // break early so that records in the ArrayResultIterator
+          // have the same fileOffset.
+          break;
+        }
+      }
+      if (num == 0) {
+        return null;
+      } else {
+        Position position = inputIterator.position();
+        return FileRecords.forRecords(splitId, new RecyclableArrayIterator<>(
+            pool.recycler(), batch, num, position.fileOffset(), position.recordOffset() - num));
+      }
+    }
+
+    @Override
+    public void close() throws IOException {
+      if (inputIterator != null) {
+        inputIterator.close();
+      }
+    }
+
+    private Pool<T[]> createPoolOfBatches(int numBatches) {
+      final Pool<T[]> poolOfBatches = new Pool<>(numBatches);
+      for (int batchId = 0; batchId < numBatches; batchId++) {
+        T[] batch = recordFactory.createBatch(batchSize);
+        poolOfBatches.add(batch);
+      }
+      return poolOfBatches;
+    }
+
+    private T[] getCachedEntry() {
+      try {
+        return pool.pollEntry();
+      } catch (InterruptedException e) {
+        Thread.currentThread().interrupt();
+        throw new UncheckedIOException(new IOException("Interrupted"));
+      }
+    }
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
new file mode 100644
index 000000000000..a296517a1846
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.io.Serializable;
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.iceberg.flink.source.DataIterator;
+import org.apache.iceberg.io.CloseableIterator;
+
+@FunctionalInterface
+public interface DataIteratorBatcher<T> extends Serializable {
+
+  CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> apply(String splitId, DataIterator<T> inputIterator);
+
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
new file mode 100644
index 000000000000..e3e6bfdc0394
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.iceberg.flink.source.DataIterator;
+import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
+import org.apache.iceberg.io.CloseableIterator;
+
+public abstract class DataIteratorReaderFunction<T> implements ReaderFunction<T> {
+
+  private final DataIteratorBatcher<T> batcher;
+
+  DataIteratorReaderFunction(DataIteratorBatcher<T> batcher) {
+    this.batcher = batcher;
+  }
+
+  public abstract DataIterator<T> createDataIterator(IcebergSourceSplit split);
+
+  @Override
+  public CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> read(IcebergSourceSplit split) {
+    DataIterator<T> inputIterator = createDataIterator(split);
+    if (split.position() != null) {
+      inputIterator.seek(split.position());
+    }
+    return batcher.apply(split.splitId(), inputIterator);
+  }
+
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java
new file mode 100644
index 000000000000..256660dd0a7a
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.Set;
+import javax.annotation.Nullable;
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.iceberg.io.CloseableIterator;
+
+/**
+ * A batch of recrods for one split
+ */
+public class FileRecords<T> implements RecordsWithSplitIds<RecordAndPosition<T>> {
+
+  @Nullable
+  private final CloseableIterator<RecordAndPosition<T>> recordsForSplit;
+  private final Set<String> finishedSplits;
+
+  @Nullable
+  private String splitId;
+  @Nullable
+  private CloseableIterator<RecordAndPosition<T>> recordsForSplitCurrent;
+
+  private FileRecords(
+      @Nullable String splitId,
+      @Nullable CloseableIterator<RecordAndPosition<T>> recordsForSplit,
+      Set<String> finishedSplits) {
+
+    this.splitId = splitId;
+    this.recordsForSplit = recordsForSplit;
+    this.finishedSplits = finishedSplits;
+  }
+
+  @Nullable
+  @Override
+  public String nextSplit() {
+    // move the split one (from current value to null)
+    final String nextSplit = this.splitId;
+    this.splitId = null;
+
+    // move the iterator, from null to value (if first move) or to null (if second move)
+    this.recordsForSplitCurrent = nextSplit != null ? this.recordsForSplit : null;
+
+    return nextSplit;
+  }
+
+  @Nullable
+  @Override
+  public RecordAndPosition<T> nextRecordFromSplit() {
+    if (recordsForSplitCurrent != null) {
+      return recordsForSplitCurrent.next();
+    } else {
+      throw new IllegalStateException();
+    }
+  }
+
+  @Override
+  public void recycle() {
+    if (recordsForSplit != null) {
+      try {
+        recordsForSplit.close();
+      } catch (IOException e) {
+        throw new RuntimeException("Failed to close the record batch");
+      }
+    }
+  }
+
+  @Override
+  public Set<String> finishedSplits() {
+    return finishedSplits;
+  }
+
+  public static <T> FileRecords<T> forRecords(
+      final String splitId, final CloseableIterator<RecordAndPosition<T>> recordsForSplit) {
+    return new FileRecords<>(splitId, recordsForSplit, Collections.emptySet());
+  }
+
+  public static <T> FileRecords<T> finishedSplit(String splitId) {
+    return new FileRecords<>(null, null, Collections.singleton(splitId));
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
new file mode 100644
index 000000000000..a2aa7d518731
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.util.concurrent.atomic.AtomicLong;
+import org.apache.flink.metrics.Counter;
+import org.apache.flink.metrics.MetricGroup;
+
+public class IcebergSourceReaderMetrics {
+
+  private final AtomicLong numRecordsOut;
+  private final AtomicLong assignedSplits;
+  private final AtomicLong finishedSplits;
+  private final Counter splitReaderFetches;
+
+  public IcebergSourceReaderMetrics(MetricGroup metricGroup) {
+    final MetricGroup readerMetricGroup = metricGroup.addGroup("IcebergSourceReader");
+
+    this.numRecordsOut = new AtomicLong();
+    this.assignedSplits = new AtomicLong();
+    this.finishedSplits = new AtomicLong();
+    readerMetricGroup.gauge("numRecordsOut", numRecordsOut::get);
+    readerMetricGroup.gauge("assignedSplits", assignedSplits::get);
+    readerMetricGroup.gauge("finishedSplits", finishedSplits::get);
+    this.splitReaderFetches = readerMetricGroup.counter("splitReaderFetches");
+  }
+
+  public void incrementNumRecordsOut(long delta) {
+    numRecordsOut.addAndGet(delta);
+  }
+
+  public void incrementAssignedSplits(long delta) {
+    assignedSplits.addAndGet(delta);
+  }
+
+  public void incrementFinishedSplits(long delta) {
+    finishedSplits.addAndGet(delta);
+  }
+
+  public void recordSplitReaderFetches() {
+    splitReaderFetches.inc();
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
new file mode 100644
index 000000000000..9c68b6f6b4ac
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayDeque;
+import java.util.Queue;
+import javax.annotation.Nullable;
+import org.apache.flink.api.connector.source.SourceReaderContext;
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
+import org.apache.flink.connector.base.source.reader.splitreader.SplitsChange;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
+import org.apache.iceberg.io.CloseableIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+class IcebergSourceSplitReader<T> implements SplitReader<RecordAndPosition<T>, IcebergSourceSplit> {
+  private static final Logger LOG = LoggerFactory.getLogger(IcebergSourceSplitReader.class);
+
+  private final ReaderFunction<T> readerFunction;
+  private final int indexOfSubtask;
+  private final IcebergSourceReaderMetrics metrics;
+
+  private final Queue<IcebergSourceSplit> splits;
+
+  @Nullable
+  private CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> currentReader;
+  @Nullable
+  private String currentSplitId;
+
+  IcebergSourceSplitReader(ReaderFunction<T> readerFunction,
+                           SourceReaderContext context,
+                           IcebergSourceReaderMetrics metrics) {
+    this.readerFunction = readerFunction;
+    this.indexOfSubtask = context.getIndexOfSubtask();
+    this.metrics = metrics;
+    this.splits = new ArrayDeque<>();
+  }
+
+  @Override
+  public RecordsWithSplitIds<RecordAndPosition<T>> fetch() throws IOException {
+    metrics.recordSplitReaderFetches();
+    checkSplitOrStartNext();
+    if (currentReader.hasNext()) {
+      // Because Iterator#next() doesn't support checked exception,
+      // we need to wrap and unwrap the checked IOException with UncheckedIOException
+      try {
+        return currentReader.next();
+      } catch (UncheckedIOException e) {
+        throw e.getCause();
+      }
+    } else {
+      return finishSplit();
+    }
+  }
+
+  @Override
+  public void handleSplitsChanges(SplitsChange<IcebergSourceSplit> splitsChanges) {
+    LOG.debug("Add splits to reader: {}", splitsChanges.splits());
+    splits.addAll(splitsChanges.splits());
+    metrics.incrementAssignedSplits(splitsChanges.splits().size());
+  }
+
+  @Override
+  public void wakeUp() {
+  }
+
+  @Override
+  public void close() throws Exception {
+    currentSplitId = null;
+    if (currentReader != null) {
+      currentReader.close();
+    }
+  }
+
+  private void checkSplitOrStartNext() throws IOException {
+    if (currentReader != null) {
+      return;
+    }
+    final IcebergSourceSplit nextSplit = splits.poll();
+    if (nextSplit == null) {
+      throw new IOException("No split remaining");
+    }
+    currentSplitId = nextSplit.splitId();
+    currentReader = readerFunction.read(nextSplit);
+  }
+
+  private FileRecords<T> finishSplit() throws IOException {
+    if (currentReader != null) {
+      currentReader.close();
+      currentReader = null;
+    }
+    final FileRecords<T> finishRecords = FileRecords.finishedSplit(currentSplitId);
+    LOG.debug("Split reader {} finished split: {}", indexOfSubtask, currentSplitId);
+    currentSplitId = null;
+    metrics.incrementFinishedSplits(1L);
+    return finishRecords;
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
new file mode 100644
index 000000000000..bd6fd097444b
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.io.Serializable;
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
+import org.apache.iceberg.io.CloseableIterator;
+
+public interface ReaderFunction<T> extends Serializable {
+
+  CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> read(IcebergSourceSplit split);
+
+}
+
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordFactory.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordFactory.java
new file mode 100644
index 000000000000..82deda46018a
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordFactory.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.io.Serializable;
+
+interface RecordFactory<T> extends Serializable {
+
+  /**
+   * Create a batch of records
+   */
+  T[] createBatch(int batchSize);
+
+  /**
+   * Clone record
+   */
+  void clone(T from, T to);
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
new file mode 100644
index 000000000000..63e657b12629
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import javax.annotation.Nullable;
+import org.apache.flink.connector.file.src.util.ArrayResultIterator;
+import org.apache.flink.connector.file.src.util.CheckpointedPosition;
+import org.apache.flink.connector.file.src.util.MutableRecordAndPosition;
+import org.apache.flink.connector.file.src.util.Pool;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.iceberg.io.CloseableIterator;
+
+/**
+ * Similar to the {@link ArrayResultIterator}.
+ * Main difference is the records array can be recycled back to a pool.
+ */
+final class RecyclableArrayIterator<E> implements CloseableIterator<RecordAndPosition<E>> {
+
+  private final Pool.Recycler<E[]> recycler;
+  private final E[] records;
+  private final int num;
+  private final MutableRecordAndPosition<E> recordAndPosition;
+
+  private int pos;
+
+  RecyclableArrayIterator(Pool.Recycler<E[]> recycler) {
+    this(recycler, null, 0, CheckpointedPosition.NO_OFFSET, 0L);
+  }
+
+  /**
+   * Each record's {@link RecordAndPosition} will have the same fileOffset (for {@link RecordAndPosition#getOffset()}.
+   * The first returned record will have a records-to-skip count of {@code recordOffset + 1}, following
+   * the contract that each record needs to point to the position AFTER itself
+   * (because a checkpoint taken after the record was emitted needs to resume from after that record).
+   */
+  RecyclableArrayIterator(
+      Pool.Recycler<E[]> recycler, final E[] newRecords,
+      final int newNum, final long fileOffset, final long recordOffset) {
+    this.recycler = recycler;
+    this.records = newRecords;
+    this.num = newNum;
+    this.recordAndPosition = new MutableRecordAndPosition<>();
+    this.recordAndPosition.set(null, fileOffset, recordOffset);
+
+    this.pos = 0;
+  }
+
+  @Override
+  public boolean hasNext() {
+    return pos < num;
+  }
+
+  @Override
+  @Nullable
+  public RecordAndPosition<E> next() {
+    if (pos < num) {
+      recordAndPosition.setNext(records[pos++]);
+      return recordAndPosition;
+    } else {
+      return null;
+    }
+  }
+
+  @Override
+  public void close() {
+    recycler.recycle(records);
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java
new file mode 100644
index 000000000000..92e054c0e13b
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.source.DataIterator;
+import org.apache.iceberg.flink.source.RowDataFileScanTaskReader;
+import org.apache.iceberg.flink.source.ScanContext;
+import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
+
+public class RowDataIteratorReaderFunction extends DataIteratorReaderFunction<RowData> {
+
+  private final Table table;
+  private final ScanContext scanContext;
+
+  public RowDataIteratorReaderFunction(
+      Configuration config,
+      Table table,
+      ScanContext scanContext,
+      RowType rowType) {
+    super(new ArrayPoolDataIteratorBatcher<>(config, new RowDataRecordFactory(rowType)));
+    this.table = table;
+    this.scanContext = scanContext;
+  }
+
+  @Override
+  public DataIterator<RowData> createDataIterator(IcebergSourceSplit split) {
+    return new DataIterator<>(
+        new RowDataFileScanTaskReader(
+            table.schema(),
+            scanContext.project(),
+            scanContext.nameMapping(),
+            scanContext.caseSensitive()),
+        split.task(),
+        table.io(),
+        table.encryption());
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataRecordFactory.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataRecordFactory.java
new file mode 100644
index 000000000000..f2ff28cab222
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataRecordFactory.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import org.apache.flink.api.common.typeutils.TypeSerializer;
+import org.apache.flink.table.data.GenericRowData;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.runtime.typeutils.InternalSerializers;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.iceberg.flink.data.RowDataUtil;
+
+class RowDataRecordFactory implements RecordFactory<RowData> {
+
+  private final RowType rowType;
+  private final TypeSerializer[] fieldSerializers;
+
+  RowDataRecordFactory(final RowType rowType) {
+    this.rowType = rowType;
+    this.fieldSerializers = createFieldSerializers(rowType);
+  }
+
+  static TypeSerializer[] createFieldSerializers(RowType rowType) {
+    return rowType.getChildren().stream()
+        .map(InternalSerializers::create)
+        .toArray(TypeSerializer[]::new);
+  }
+
+  @Override
+  public RowData[] createBatch(int batchSize) {
+    RowData[] arr = new RowData[batchSize];
+    for (int i = 0; i < batchSize; ++i) {
+      arr[i] = new GenericRowData(rowType.getFieldCount());
+    }
+    return arr;
+  }
+
+  @Override
+  public void clone(RowData from, RowData to) {
+    RowDataUtil.clone(from, to, rowType, fieldSerializers);
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
new file mode 100644
index 000000000000..02692dd86668
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.split;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.stream.Collectors;
+import javax.annotation.Nullable;
+import org.apache.flink.api.connector.source.SourceSplit;
+import org.apache.iceberg.CombinedScanTask;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.flink.source.Position;
+import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
+import org.apache.iceberg.relocated.com.google.common.base.Objects;
+import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
+
+public class IcebergSourceSplit implements SourceSplit, Serializable {
+
+  private final CombinedScanTask task;
+
+  /**
+   * Position field is mutable
+   */
+  @Nullable
+  private final Position position;
+
+  /**
+   * The splits are frequently serialized into checkpoints.
+   * Caching the byte representation makes repeated serialization cheap.
+   */
+  @Nullable
+  private transient byte[] serializedFormCache;
+
+  public IcebergSourceSplit(CombinedScanTask task, Position position) {
+    this.task = task;
+    this.position = position;
+  }
+
+  public static IcebergSourceSplit fromCombinedScanTask(CombinedScanTask combinedScanTask) {
+    return fromCombinedScanTask(combinedScanTask, 0L, 0L);
+  }
+
+  public static IcebergSourceSplit fromCombinedScanTask(
+      CombinedScanTask combinedScanTask, long fileOffset, long recordOffset) {
+    return new IcebergSourceSplit(combinedScanTask, new Position(fileOffset, recordOffset));
+  }
+
+  public CombinedScanTask task() {
+    return task;
+  }
+
+  public Position position() {
+    return position;
+  }
+
+  public byte[] serializedFormCache() {
+    return serializedFormCache;
+  }
+
+  public void serializedFormCache(byte[] cachedBytes) {
+    this.serializedFormCache = cachedBytes;
+  }
+
+  @Override
+  public String splitId() {
+    return MoreObjects.toStringHelper(this)
+        .add("files", toString(task.files()))
+        .toString();
+  }
+
+  public void updatePosition(long newFileOffset, long newRecordOffset) {
+    position.update(newFileOffset, newRecordOffset);
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+    IcebergSourceSplit split = (IcebergSourceSplit) o;
+    return Objects.equal(splitId(), split.splitId()) &&
+        Objects.equal(position, split.position());
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hashCode(splitId());
+  }
+
+  @Override
+  public String toString() {
+    return MoreObjects.toStringHelper(this)
+        .add("files", toString(task.files()))
+        .add("position", position)
+        .toString();
+  }
+
+  private String toString(Collection<FileScanTask> files) {
+    return Iterables.toString(files.stream().map(fileScanTask ->
+        MoreObjects.toStringHelper(fileScanTask)
+            .add("file", fileScanTask.file() != null ?
+                fileScanTask.file().path().toString() :
+                "NoFile")
+            .add("start", fileScanTask.start())
+            .add("length", fileScanTask.length())
+            .toString()).collect(Collectors.toList()));
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
new file mode 100644
index 000000000000..dcac972bd06f
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.split;
+
+import java.io.IOException;
+import org.apache.flink.annotation.VisibleForTesting;
+import org.apache.flink.core.io.SimpleVersionedSerializer;
+import org.apache.flink.util.InstantiationUtil;
+
+/**
+ * TODO: use Java serialization for now.
+ * will switch to more stable serializer from issue-1698.
+ */
+public class IcebergSourceSplitSerializer implements SimpleVersionedSerializer<IcebergSourceSplit> {
+
+  public static final IcebergSourceSplitSerializer INSTANCE = new IcebergSourceSplitSerializer();
+
+  private static final int VERSION = 1;
+
+  @Override
+  public int getVersion() {
+    return VERSION;
+  }
+
+  @Override
+  public byte[] serialize(IcebergSourceSplit split) throws IOException {
+    if (split.serializedFormCache() == null) {
+      final byte[] result = serializeV1(split);
+      split.serializedFormCache(result);
+    }
+    return split.serializedFormCache();
+  }
+
+  @Override
+  public IcebergSourceSplit deserialize(int version, byte[] serialized) throws IOException {
+    switch (version) {
+      case 1:
+        return deserializeV1(serialized);
+      default:
+        throw new IOException("Unknown version: " + version);
+    }
+  }
+
+  @VisibleForTesting
+  byte[] serializeV1(IcebergSourceSplit split) throws IOException {
+    return InstantiationUtil.serializeObject(split);
+  }
+
+  @VisibleForTesting
+  IcebergSourceSplit deserializeV1(byte[] serialized) throws IOException {
+    try {
+      return InstantiationUtil.deserializeObject(serialized, getClass().getClassLoader());
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException("Failed to deserialize the split.", e);
+    }
+  }
+}
diff --git a/flink/src/test/java/org/apache/iceberg/flink/HadoopTableResource.java b/flink/src/test/java/org/apache/iceberg/flink/HadoopTableResource.java
new file mode 100644
index 000000000000..a205b22f3ed5
--- /dev/null
+++ b/flink/src/test/java/org/apache/iceberg/flink/HadoopTableResource.java
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink;
+
+import java.io.File;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.catalog.TableIdentifier;
+import org.apache.iceberg.hadoop.HadoopCatalog;
+import org.junit.Assert;
+import org.junit.rules.ExternalResource;
+import org.junit.rules.TemporaryFolder;
+
+public class HadoopTableResource extends ExternalResource {
+
+  private final TemporaryFolder temporaryFolder;
+  private final String database;
+  private final String tableName;
+  private final Schema schema;
+
+  private HadoopCatalog catalog;
+  private TableLoader tableLoader;
+  private Table table;
+
+  public HadoopTableResource(TemporaryFolder temporaryFolder, String database, String tableName, Schema schema) {
+    this.temporaryFolder = temporaryFolder;
+    this.database = database;
+    this.tableName = tableName;
+    this.schema = schema;
+  }
+
+  @Override
+  protected void before() throws Throwable {
+    File warehouseFile = temporaryFolder.newFolder();
+    Assert.assertTrue(warehouseFile.delete());
+    // before variables
+    String warehouse = "file:" + warehouseFile;
+    Configuration hadoopConf = new Configuration();
+    this.catalog = new HadoopCatalog(hadoopConf, warehouse);
+    String location = String.format("%s/%s/%s", warehouse, database, tableName);
+    this.tableLoader = TableLoader.fromHadoopTable(location);
+    this.table = catalog.createTable(TableIdentifier.of(database, tableName), schema);
+    tableLoader.open();
+  }
+
+  @Override
+  protected void after() {
+    try {
+      catalog.dropTable(TableIdentifier.of(database, tableName));
+      catalog.close();
+      tableLoader.close();
+    } catch (Exception e) {
+      throw new RuntimeException("Failed to close catalog resource");
+    }
+  }
+
+  public TableLoader tableLoader() {
+    return tableLoader;
+  }
+
+  public Table table() {
+    return table;
+  }
+}
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
new file mode 100644
index 000000000000..54abab75341d
--- /dev/null
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
@@ -0,0 +1,277 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.iceberg.BaseCombinedScanTask;
+import org.apache.iceberg.CombinedScanTask;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.data.GenericAppenderHelper;
+import org.apache.iceberg.data.RandomGenericData;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.flink.HadoopTableResource;
+import org.apache.iceberg.flink.TestFixtures;
+import org.apache.iceberg.flink.source.FlinkSplitGenerator;
+import org.apache.iceberg.flink.source.Position;
+import org.apache.iceberg.flink.source.ScanContext;
+import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
+import org.apache.iceberg.io.CloseableIterator;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.hamcrest.CoreMatchers;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(Parameterized.class)
+public abstract class ReaderFunctionTestBase<T> {
+
+  @Parameterized.Parameters(name = "fileFormat={0}")
+  public static Object[][] parameters() {
+    return new Object[][]{
+        new Object[]{FileFormat.AVRO},
+        new Object[]{FileFormat.ORC},
+        new Object[]{FileFormat.PARQUET}
+    };
+  }
+
+  @ClassRule
+  public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+
+  protected static final ScanContext scanContext = ScanContext.builder()
+      .project(TestFixtures.SCHEMA)
+      .build();
+
+  @Rule
+  public final HadoopTableResource tableResource = new HadoopTableResource(TEMPORARY_FOLDER,
+      TestFixtures.DATABASE, TestFixtures.TABLE, TestFixtures.SCHEMA);
+
+  protected abstract ReaderFunction<T> readerFunction();
+
+  protected abstract void assertRecords(List<Record> expected, List<T> actual, Schema schema);
+
+  private final FileFormat fileFormat;
+
+  public ReaderFunctionTestBase(FileFormat fileFormat) {
+    this.fileFormat = fileFormat;
+  }
+
+  private List<List<Record>> recordBatchList;
+  private List<DataFile> dataFileList;
+  private IcebergSourceSplit icebergSplit;
+
+  @Before
+  public void before() throws IOException {
+    final GenericAppenderHelper dataAppender = new GenericAppenderHelper(
+        tableResource.table(), fileFormat, TEMPORARY_FOLDER);
+    recordBatchList = new ArrayList<>(3);
+    dataFileList = new ArrayList<>(2);
+    for (int i = 0; i < 3; ++i) {
+      List<Record> records = RandomGenericData.generate(TestFixtures.SCHEMA, 2, i);
+      recordBatchList.add(records);
+      DataFile dataFile = dataAppender.writeFile(null, records);
+      dataFileList.add(dataFile);
+      dataAppender.appendToTable(dataFile);
+    }
+
+    final List<IcebergSourceSplit> splits = FlinkSplitGenerator
+        .planIcebergSourceSplits(tableResource.table(), scanContext);
+    Assert.assertEquals(1, splits.size());
+    Assert.assertEquals(3, splits.get(0).task().files().size());
+    icebergSplit = sortFilesAsAppendOrder(splits.get(0), dataFileList);
+  }
+
+  /**
+   * Split planning doesn't guarantee the order is the same as appended.
+   * So we re-arrange the list to make the assertion simpler
+   */
+  public static IcebergSourceSplit sortFilesAsAppendOrder(IcebergSourceSplit split, List<DataFile> dataFiles) {
+    Collection<FileScanTask> files = split.task().files();
+    Assert.assertEquals(files.size(), dataFiles.size());
+    FileScanTask[] sortedFileArray = new FileScanTask[files.size()];
+    for (FileScanTask fileScanTask : files) {
+      for (int i = 0; i < dataFiles.size(); ++i) {
+        if (fileScanTask.file().path().toString().equals(dataFiles.get(i).path().toString())) {
+          sortedFileArray[i] = fileScanTask;
+        }
+      }
+    }
+    List<FileScanTask> sortedFileList = Lists.newArrayList(sortedFileArray);
+    Assert.assertThat(sortedFileList, CoreMatchers.everyItem(CoreMatchers.notNullValue(FileScanTask.class)));
+    CombinedScanTask rearrangedCombinedTask = new BaseCombinedScanTask(sortedFileList);
+    return IcebergSourceSplit.fromCombinedScanTask(rearrangedCombinedTask);
+  }
+
+  /**
+   * We have to combine the record extraction and position assertion in a single function,
+   * because iterator is only valid for one pass.
+   */
+  private List<T> extractRecordsAndAssertPosition(
+      RecordsWithSplitIds<RecordAndPosition<T>> batch,
+      long expectedCount, long exptectedFileOffset, long startRecordOffset) {
+    // need to call nextSplit first in order to read the batch
+    batch.nextSplit();
+    final List<T> records = new ArrayList<>();
+    long recordOffset = startRecordOffset;
+    RecordAndPosition<T> recordAndPosition;
+    while ((recordAndPosition = batch.nextRecordFromSplit()) != null) {
+      records.add(recordAndPosition.getRecord());
+      Assert.assertEquals("expected file offset", exptectedFileOffset, recordAndPosition.getOffset());
+      Assert.assertEquals("expected record offset", recordOffset, recordAndPosition.getRecordSkipCount() - 1);
+      recordOffset++;
+    }
+    Assert.assertEquals("expected record count", expectedCount, records.size());
+    return records;
+  }
+
+  @Test
+  public void testNoCheckpointedPosition() throws IOException {
+    final IcebergSourceSplit split = icebergSplit;
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
+    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0L, 0L);
+    assertRecords(recordBatchList.get(0), actual0, TestFixtures.SCHEMA);
+    batch0.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
+    assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
+    batch1.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
+    batch2.recycle();
+  }
+
+  @Test
+  public void testCheckpointedPositionBeforeFirstFile() throws IOException {
+    final IcebergSourceSplit split = new IcebergSourceSplit(
+        icebergSplit.task(),
+        new Position(0L, 0L));
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
+    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0L, 0L);
+    assertRecords(recordBatchList.get(0), actual0, TestFixtures.SCHEMA);
+    batch0.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
+    assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
+    batch1.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
+    batch2.recycle();
+  }
+
+  @Test
+  public void testCheckpointedPositionMiddleFirstFile() throws IOException {
+    final IcebergSourceSplit split = new IcebergSourceSplit(
+        icebergSplit.task(),
+        new Position(0L, 1L));
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
+    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, 1L, 0L, 1L);
+    assertRecords(recordBatchList.get(0).subList(1, 2), actual0, TestFixtures.SCHEMA);
+    batch0.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
+    assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
+    batch1.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
+    batch2.recycle();
+  }
+
+  @Test
+  public void testCheckpointedPositionAfterFirstFile() throws IOException {
+    final IcebergSourceSplit split = new IcebergSourceSplit(
+        icebergSplit.task(),
+        new Position(0L, 2L));
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(1).size(), 1L, 0L);
+    assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
+    batch0.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
+    batch2.recycle();
+  }
+
+  @Test
+  public void testCheckpointedPositionBeforeSecondFile() throws IOException {
+    final IcebergSourceSplit split = new IcebergSourceSplit(
+        icebergSplit.task(),
+        new Position(1L, 0L));
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
+    assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
+    batch1.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
+    batch2.recycle();
+  }
+
+  @Test
+  public void testCheckpointedPositionMidSecondFile() throws IOException {
+    final IcebergSourceSplit split = new IcebergSourceSplit(
+        icebergSplit.task(),
+        new Position(1L, 1L));
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, 1L, 1L, 1L);
+    assertRecords(recordBatchList.get(1).subList(1, 2), actual1, TestFixtures.SCHEMA);
+    batch1.recycle();
+
+    final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
+    batch2.recycle();
+  }
+
+}
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
new file mode 100644
index 000000000000..e83d1d61dd35
--- /dev/null
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.base.source.reader.splitreader.SplitsAddition;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.types.Row;
+import org.apache.iceberg.DataFile;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.data.GenericAppenderHelper;
+import org.apache.iceberg.data.RandomGenericData;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.HadoopTableResource;
+import org.apache.iceberg.flink.TestFixtures;
+import org.apache.iceberg.flink.TestHelpers;
+import org.apache.iceberg.flink.source.FlinkSplitGenerator;
+import org.apache.iceberg.flink.source.ScanContext;
+import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
+import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.RuleChain;
+import org.junit.rules.TemporaryFolder;
+import org.junit.rules.TestRule;
+
+public class TestIcebergSourceSplitReader {
+
+  public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+  public static final HadoopTableResource tableResource = new HadoopTableResource(TEMPORARY_FOLDER,
+      TestFixtures.DATABASE, TestFixtures.TABLE, TestFixtures.SCHEMA);
+
+  @ClassRule
+  public static final TestRule chain = RuleChain
+      .outerRule(TEMPORARY_FOLDER)
+      .around(tableResource);
+
+  private static final ScanContext scanContext = ScanContext.builder()
+      .project(TestFixtures.SCHEMA)
+      .build();
+  private static final FileFormat fileFormat = FileFormat.PARQUET;
+
+  private static List<List<Record>> recordBatchList;
+  private static List<DataFile> dataFileList;
+  private static IcebergSourceSplit icebergSplit;
+
+  @BeforeClass
+  public static void beforeClass() throws IOException {
+    final GenericAppenderHelper dataAppender = new GenericAppenderHelper(
+        tableResource.table(), fileFormat, TEMPORARY_FOLDER);
+    recordBatchList = new ArrayList<>(3);
+    dataFileList = new ArrayList<>(2);
+    for (int i = 0; i < 3; ++i) {
+      List<Record> records = RandomGenericData.generate(TestFixtures.SCHEMA, 2, i);
+      recordBatchList.add(records);
+      DataFile dataFile = dataAppender.writeFile(null, records);
+      dataFileList.add(dataFile);
+      dataAppender.appendToTable(dataFile);
+    }
+
+    final List<IcebergSourceSplit> splits = FlinkSplitGenerator
+        .planIcebergSourceSplits(tableResource.table(), scanContext);
+    Assert.assertEquals(1, splits.size());
+    Assert.assertEquals(3, splits.get(0).task().files().size());
+    icebergSplit = ReaderFunctionTestBase.sortFilesAsAppendOrder(splits.get(0), dataFileList);
+  }
+
+  @Test
+  public void testFullScan() throws Exception {
+    final IcebergSourceSplit split = icebergSplit;
+    final Configuration config = new Configuration();
+    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
+    IcebergSourceSplitReader reader = new IcebergSourceSplitReader(
+        new RowDataIteratorReaderFunction(config, tableResource.table(), scanContext, rowType));
+    reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch0 = reader.fetch();
+    final List<Row> rowBatch0 = readRows(readBatch0, split.splitId(), 0L, 0L);
+    TestHelpers.assertRecords(rowBatch0, recordBatchList.get(0), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1
+        = reader.fetch();
+    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1L, 0L);
+    TestHelpers.assertRecords(rowBatch1, recordBatchList.get(1), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch2 = reader.fetch();
+    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2L, 0L);
+    TestHelpers.assertRecords(rowBatch2, recordBatchList.get(2), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> finishedBatch = reader.fetch();
+    Assert.assertEquals(Sets.newHashSet(split.splitId()), finishedBatch.finishedSplits());
+    Assert.assertEquals(null, finishedBatch.nextSplit());
+  }
+
+  @Test
+  public void testResumeFromEndOfFirstBatch() throws Exception {
+    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 0L, 2L);
+    final Configuration config = new Configuration();
+    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
+    IcebergSourceSplitReader reader = new IcebergSourceSplitReader(
+        new RowDataIteratorReaderFunction(config, tableResource.table(), scanContext, rowType));
+    reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
+    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1L, 0L);
+    TestHelpers.assertRecords(rowBatch1, recordBatchList.get(1), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch2 = reader.fetch();
+    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2L, 0L);
+    TestHelpers.assertRecords(rowBatch2, recordBatchList.get(2), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> finishedBatch = reader.fetch();
+    Assert.assertEquals(Sets.newHashSet(split.splitId()), finishedBatch.finishedSplits());
+    Assert.assertEquals(null, finishedBatch.nextSplit());
+  }
+
+  @Test
+  public void testResumeFromStartOfSecondBatch() throws Exception {
+    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 1L, 0L);
+    final Configuration config = new Configuration();
+    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
+    IcebergSourceSplitReader reader = new IcebergSourceSplitReader(
+        new RowDataIteratorReaderFunction(config, tableResource.table(), scanContext, rowType));
+    reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
+    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1L, 0L);
+    TestHelpers.assertRecords(rowBatch1, recordBatchList.get(1), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch2 = reader.fetch();
+    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2L, 0L);
+    TestHelpers.assertRecords(rowBatch2, recordBatchList.get(2), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> finishedBatch
+        = reader.fetch();
+    Assert.assertEquals(Sets.newHashSet(split.splitId()), finishedBatch.finishedSplits());
+    Assert.assertEquals(null, finishedBatch.nextSplit());
+  }
+
+  @Test
+  public void testResumeFromMiddleOfSecondBatch() throws Exception {
+    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 1L, 1L);
+
+    final Configuration config = new Configuration();
+    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
+    IcebergSourceSplitReader reader = new IcebergSourceSplitReader(
+        new RowDataIteratorReaderFunction(config, tableResource.table(), scanContext, rowType));
+    reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
+    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1L, 1L);
+    TestHelpers.assertRecords(rowBatch1, recordBatchList.get(1).subList(1, 2), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch2 = reader.fetch();
+    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2L, 0L);
+    TestHelpers.assertRecords(rowBatch2, recordBatchList.get(2), TestFixtures.SCHEMA);
+
+    final RecordsWithSplitIds<RecordAndPosition<RowData>> finishedBatch
+        = reader.fetch();
+    Assert.assertEquals(Sets.newHashSet(split.splitId()), finishedBatch.finishedSplits());
+    Assert.assertEquals(null, finishedBatch.nextSplit());
+  }
+
+  private List<Row> readRows(
+      RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch,
+      String expectedSplitId, long expectedOffset, long expectedStartingRecordOffset) {
+    Assert.assertEquals(expectedSplitId, readBatch.nextSplit());
+    final List<RowData> rowDataList = new ArrayList<>();
+    RecordAndPosition<RowData> row;
+    int num = 0;
+    while ((row = readBatch.nextRecordFromSplit()) != null) {
+      Assert.assertEquals(expectedOffset, row.getOffset());
+      num++;
+      Assert.assertEquals(expectedStartingRecordOffset + num, row.getRecordSkipCount());
+      rowDataList.add(row.getRecord());
+    }
+    readBatch.recycle();
+    return TestHelpers.convertRowDataToRow(rowDataList, TestFixtures.ROW_TYPE);
+  }
+
+}
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRecyclableArrayIterator.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRecyclableArrayIterator.java
new file mode 100644
index 000000000000..eb3503ed5371
--- /dev/null
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRecyclableArrayIterator.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestRecyclableArrayIterator {
+
+  @Test
+  public void testEmptyConstruction() {
+    // dummy recycler
+    final RecyclableArrayIterator<Object> iter = new RecyclableArrayIterator<>(
+        ignored -> System.currentTimeMillis());
+    Assert.assertNull(iter.next());
+  }
+
+  @Test
+  public void testGetElements() {
+    final String[] elements = new String[]{"1", "2", "3", "4"};
+    final long initialOffset = 3;
+    final long initialSkipCount = 17;
+
+    // dummy recycler
+    final RecyclableArrayIterator<String> iter = new RecyclableArrayIterator<>(
+        ignored -> System.currentTimeMillis(), elements, elements.length, initialOffset, initialSkipCount);
+
+    for (int i = 0; i < elements.length; i++) {
+      final RecordAndPosition<String> recAndPos = iter.next();
+      Assert.assertEquals(elements[i], recAndPos.getRecord());
+      Assert.assertEquals(initialOffset, recAndPos.getOffset());
+      Assert.assertEquals(initialSkipCount + i + 1, recAndPos.getRecordSkipCount());
+    }
+  }
+
+  @Test
+  public void testExhausted() {
+    // dummy recycler
+    final RecyclableArrayIterator<String> iter = new RecyclableArrayIterator<>(
+        ignored -> System.currentTimeMillis(), new String[]{"1", "2"}, 2, 0L, 0L);
+
+    iter.next();
+    iter.next();
+
+    Assert.assertNull(iter.next());
+  }
+
+  @Test
+  public void testArraySubRange() {
+    // dummy recycler
+    final RecyclableArrayIterator<String> iter = new RecyclableArrayIterator<>(ignored -> System.currentTimeMillis(),
+        new String[]{"1", "2", "3"}, 2, 0L, 0L);
+
+    Assert.assertNotNull(iter.next());
+    Assert.assertNotNull(iter.next());
+    Assert.assertNull(iter.next());
+  }
+
+  @Test
+  public void testRecycler() {
+    final AtomicBoolean recycled = new AtomicBoolean();
+    final RecyclableArrayIterator<String> iter = new RecyclableArrayIterator<>(ignored -> recycled.set(true));
+    iter.close();
+    Assert.assertTrue(recycled.get());
+  }
+}
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataIteratorReaderFunction.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataIteratorReaderFunction.java
new file mode 100644
index 000000000000..247b788403be
--- /dev/null
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataIteratorReaderFunction.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import java.util.List;
+import java.util.stream.Collectors;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.table.data.RowData;
+import org.apache.flink.table.data.conversion.DataStructureConverter;
+import org.apache.flink.table.data.conversion.DataStructureConverters;
+import org.apache.flink.table.types.logical.RowType;
+import org.apache.flink.table.types.utils.TypeConversions;
+import org.apache.flink.types.Row;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.Schema;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
+import org.apache.iceberg.flink.TestFixtures;
+import org.apache.iceberg.flink.TestHelpers;
+
+public class TestRowDataIteratorReaderFunction extends ReaderFunctionTestBase<RowData> {
+
+  protected static final RowType rowType = FlinkSchemaUtil
+      .convert(scanContext.project());
+  private static final DataStructureConverter<Object, Object> rowDataConverter = DataStructureConverters.getConverter(
+      TypeConversions.fromLogicalToDataType(rowType));
+  private static final org.apache.flink.configuration.Configuration flinkConfig =
+      new org.apache.flink.configuration.Configuration();
+
+  public TestRowDataIteratorReaderFunction(FileFormat fileFormat) {
+    super(fileFormat);
+  }
+
+  @Override
+  protected ReaderFunction<RowData> readerFunction() {
+    return new RowDataIteratorReaderFunction(new Configuration(), tableResource.table(), scanContext, rowType);
+  }
+
+  @Override
+  protected void assertRecords(List<Record> expected, List<RowData> actual, Schema schema) {
+    final List<Row> rows = toRows(actual);
+    TestHelpers.assertRecords(rows, expected, TestFixtures.SCHEMA);
+  }
+
+  private List<Row> toRows(List<RowData> actual) {
+    return actual.stream()
+        .map(rowData -> (Row) rowDataConverter.toExternal(rowData))
+        .collect(Collectors.toList());
+  }
+}
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/split/SplitHelpers.java b/flink/src/test/java/org/apache/iceberg/flink/source/split/SplitHelpers.java
new file mode 100644
index 000000000000..91cf7c0526c6
--- /dev/null
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/split/SplitHelpers.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.split;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.stream.Collectors;
+import org.apache.iceberg.BaseCombinedScanTask;
+import org.apache.iceberg.CombinedScanTask;
+import org.apache.iceberg.FileFormat;
+import org.apache.iceberg.FileScanTask;
+import org.apache.iceberg.MockFileScanTask;
+import org.apache.iceberg.Table;
+import org.apache.iceberg.data.GenericAppenderHelper;
+import org.apache.iceberg.data.RandomGenericData;
+import org.apache.iceberg.data.Record;
+import org.apache.iceberg.flink.TestFixtures;
+import org.apache.iceberg.flink.source.FlinkSplitGenerator;
+import org.apache.iceberg.flink.source.ScanContext;
+import org.apache.iceberg.hadoop.HadoopCatalog;
+import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.junit.Assert;
+import org.junit.rules.TemporaryFolder;
+
+public class SplitHelpers {
+
+  private static final AtomicLong splitLengthIncrement = new AtomicLong();
+
+  private SplitHelpers() {
+
+  }
+
+  public static List<IcebergSourceSplit> createMockedSplits(int splitCount) {
+    final List<IcebergSourceSplit> splits = new ArrayList<>();
+    for (int i = 0; i < splitCount; ++i) {
+      // make sure each task has a different length,
+      // as it is part of the splitId calculation.
+      // This way, we can make sure all generated splits have different splitIds
+      FileScanTask fileScanTask = new MockFileScanTask(1024 + splitLengthIncrement.incrementAndGet());
+      CombinedScanTask combinedScanTask = new BaseCombinedScanTask(fileScanTask);
+      splits.add(IcebergSourceSplit.fromCombinedScanTask(combinedScanTask));
+    }
+    return splits;
+  }
+
+  public static List<IcebergSourceSplit> createFileSplits(
+      TemporaryFolder temporaryFolder, int fileCount, int filesPerSplit) throws Exception {
+    final File warehouseFile = temporaryFolder.newFolder();
+    Assert.assertTrue(warehouseFile.delete());
+    final String warehouse = "file:" + warehouseFile;
+    org.apache.hadoop.conf.Configuration hadoopConf = new org.apache.hadoop.conf.Configuration();
+    final HadoopCatalog catalog = new HadoopCatalog(hadoopConf, warehouse);
+    try {
+      final Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, TestFixtures.SCHEMA);
+      final GenericAppenderHelper dataAppender = new GenericAppenderHelper(
+          table, FileFormat.PARQUET, temporaryFolder);
+      for (int i = 0; i < fileCount; ++i) {
+        List<Record> records = RandomGenericData.generate(TestFixtures.SCHEMA, 2, i);
+        dataAppender.appendToTable(records);
+      }
+
+      final ScanContext scanContext = ScanContext.builder().build();
+      final List<IcebergSourceSplit> splits = FlinkSplitGenerator.planIcebergSourceSplits(table, scanContext);
+      return splits.stream()
+          .flatMap(split -> {
+            List<List<FileScanTask>> filesList = Lists.partition(new ArrayList<>(split.task().files()), filesPerSplit);
+            return filesList.stream()
+                .map(files ->  new BaseCombinedScanTask(files))
+                .map(combinedScanTask -> IcebergSourceSplit.fromCombinedScanTask(combinedScanTask));
+          })
+          .collect(Collectors.toList());
+    } finally {
+      catalog.dropTable(TestFixtures.TABLE_IDENTIFIER);
+      catalog.close();
+    }
+  }
+}
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/split/TestIcebergSourceSplitSerializer.java b/flink/src/test/java/org/apache/iceberg/flink/source/split/TestIcebergSourceSplitSerializer.java
new file mode 100644
index 000000000000..38b358ec55a7
--- /dev/null
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/split/TestIcebergSourceSplitSerializer.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.split;
+
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.stream.Collectors;
+import org.apache.iceberg.flink.source.Position;
+import org.junit.Assert;
+import org.junit.ClassRule;
+import org.junit.Test;
+import org.junit.rules.TemporaryFolder;
+
+public class TestIcebergSourceSplitSerializer {
+
+  @ClassRule
+  public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();
+
+  private final IcebergSourceSplitSerializer serializer = IcebergSourceSplitSerializer.INSTANCE;
+
+  @Test
+  public void testLatestVersion() throws Exception {
+    serializeAndDeserialize(1, 1);
+    serializeAndDeserialize(10, 2);
+  }
+
+  private void serializeAndDeserialize(int splitCount, int filesPerSplit) throws Exception {
+    final List<IcebergSourceSplit> splits = SplitHelpers.createFileSplits(TEMPORARY_FOLDER, splitCount, filesPerSplit);
+    for (IcebergSourceSplit split : splits) {
+      final byte[] result = serializer.serialize(split);
+      final IcebergSourceSplit deserialized = serializer.deserialize(serializer.getVersion(), result);
+      Assert.assertEquals(split, deserialized);
+
+      final byte[] cachedResult = serializer.serialize(split);
+      Assert.assertSame(result, cachedResult);
+      final IcebergSourceSplit deserialized2 = serializer.deserialize(serializer.getVersion(), cachedResult);
+      Assert.assertEquals(split, deserialized2);
+    }
+  }
+
+  @Test
+  public void testV1() throws Exception {
+    serializeAndDeserializeV1(1, 1);
+    serializeAndDeserializeV1(10, 2);
+  }
+
+  private void serializeAndDeserializeV1(int splitCount, int filesPerSplit) throws Exception {
+    final List<IcebergSourceSplit> splits = SplitHelpers.createFileSplits(TEMPORARY_FOLDER, splitCount, filesPerSplit);
+    for (IcebergSourceSplit split : splits) {
+      final byte[] result = serializer.serializeV1(split);
+      final IcebergSourceSplit deserialized = serializer.deserializeV1(result);
+      Assert.assertEquals(split, deserialized);
+    }
+  }
+
+  @Test
+  public void testCheckpointedPosition() throws Exception {
+    final AtomicInteger index = new AtomicInteger();
+    final List<IcebergSourceSplit> splits = SplitHelpers.createFileSplits(TEMPORARY_FOLDER, 10, 2).stream()
+        .map(split -> {
+          final IcebergSourceSplit result;
+          if (index.get() % 2 == 0) {
+            result = new IcebergSourceSplit(split.task(), new Position(index.get(), index.get()));
+          } else {
+            result = split;
+          }
+          index.incrementAndGet();
+          return result;
+        })
+        .collect(Collectors.toList());
+
+    for (IcebergSourceSplit split : splits) {
+      final byte[] result = serializer.serialize(split);
+      final IcebergSourceSplit deserialized = serializer.deserialize(serializer.getVersion(), result);
+      Assert.assertEquals(split, deserialized);
+
+      final byte[] cachedResult = serializer.serialize(split);
+      Assert.assertSame(result, cachedResult);
+      final IcebergSourceSplit deserialized2 = serializer.deserialize(serializer.getVersion(), cachedResult);
+      Assert.assertEquals(split, deserialized2);
+    }
+  }
+}

From a0037dbb0e4e01bfe64f2e5e2bd4b36f21755ee1 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Wed, 8 Sep 2021 09:52:25 -0700
Subject: [PATCH 02/14] whitespace change

---
 .../apache/iceberg/flink/source/RowDataFileScanTaskReader.java | 1 -
 .../flink/source/reader/ArrayPoolDataIteratorBatcher.java      | 1 -
 .../iceberg/flink/source/reader/DataIteratorBatcher.java       | 2 --
 .../flink/source/reader/DataIteratorReaderFunction.java        | 3 +--
 .../flink/source/reader/IcebergSourceReaderMetrics.java        | 1 -
 .../iceberg/flink/source/reader/IcebergSourceSplitReader.java  | 1 -
 .../org/apache/iceberg/flink/source/reader/ReaderFunction.java | 2 --
 .../org/apache/iceberg/flink/source/reader/RecordFactory.java  | 1 -
 .../iceberg/flink/source/reader/RecyclableArrayIterator.java   | 1 -
 .../flink/source/reader/RowDataIteratorReaderFunction.java     | 1 -
 .../iceberg/flink/source/reader/RowDataRecordFactory.java      | 1 -
 .../apache/iceberg/flink/source/split/IcebergSourceSplit.java  | 2 --
 .../flink/source/split/IcebergSourceSplitSerializer.java       | 2 --
 13 files changed, 1 insertion(+), 18 deletions(-)

diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/RowDataFileScanTaskReader.java b/flink/src/main/java/org/apache/iceberg/flink/source/RowDataFileScanTaskReader.java
index fbdb7bf3cc02..dbe8a747f96b 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/RowDataFileScanTaskReader.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/RowDataFileScanTaskReader.java
@@ -48,7 +48,6 @@
 
 @Internal
 public class RowDataFileScanTaskReader implements FileScanTaskReader<RowData> {
-
   private final Schema tableSchema;
   private final Schema projectedSchema;
   private final String nameMapping;
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
index 6b327898a8b1..fc03fc4683fb 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
@@ -32,7 +32,6 @@
 import org.apache.iceberg.io.CloseableIterator;
 
 class ArrayPoolDataIteratorBatcher<T> implements DataIteratorBatcher<T> {
-
   private final Configuration config;
   private final RecordFactory<T> recordFactory;
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
index a296517a1846..d2784f253279 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
@@ -27,7 +27,5 @@
 
 @FunctionalInterface
 public interface DataIteratorBatcher<T> extends Serializable {
-
   CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> apply(String splitId, DataIterator<T> inputIterator);
-
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
index e3e6bfdc0394..364879401316 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
@@ -26,10 +26,9 @@
 import org.apache.iceberg.io.CloseableIterator;
 
 public abstract class DataIteratorReaderFunction<T> implements ReaderFunction<T> {
-
   private final DataIteratorBatcher<T> batcher;
 
-  DataIteratorReaderFunction(DataIteratorBatcher<T> batcher) {
+  public DataIteratorReaderFunction(DataIteratorBatcher<T> batcher) {
     this.batcher = batcher;
   }
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
index a2aa7d518731..cb81d54bab90 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
@@ -24,7 +24,6 @@
 import org.apache.flink.metrics.MetricGroup;
 
 public class IcebergSourceReaderMetrics {
-
   private final AtomicLong numRecordsOut;
   private final AtomicLong assignedSplits;
   private final AtomicLong finishedSplits;
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
index 9c68b6f6b4ac..359a352864d1 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
@@ -40,7 +40,6 @@ class IcebergSourceSplitReader<T> implements SplitReader<RecordAndPosition<T>, I
   private final ReaderFunction<T> readerFunction;
   private final int indexOfSubtask;
   private final IcebergSourceReaderMetrics metrics;
-
   private final Queue<IcebergSourceSplit> splits;
 
   @Nullable
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
index bd6fd097444b..c3a32bc0e05d 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
@@ -26,8 +26,6 @@
 import org.apache.iceberg.io.CloseableIterator;
 
 public interface ReaderFunction<T> extends Serializable {
-
   CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> read(IcebergSourceSplit split);
-
 }
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordFactory.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordFactory.java
index 82deda46018a..c006558e8de4 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordFactory.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordFactory.java
@@ -22,7 +22,6 @@
 import java.io.Serializable;
 
 interface RecordFactory<T> extends Serializable {
-
   /**
    * Create a batch of records
    */
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
index 63e657b12629..e0bd19ed3f76 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
@@ -32,7 +32,6 @@
  * Main difference is the records array can be recycled back to a pool.
  */
 final class RecyclableArrayIterator<E> implements CloseableIterator<RecordAndPosition<E>> {
-
   private final Pool.Recycler<E[]> recycler;
   private final E[] records;
   private final int num;
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java
index 92e054c0e13b..c90df52f349e 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java
@@ -29,7 +29,6 @@
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 
 public class RowDataIteratorReaderFunction extends DataIteratorReaderFunction<RowData> {
-
   private final Table table;
   private final ScanContext scanContext;
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataRecordFactory.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataRecordFactory.java
index f2ff28cab222..3e46574cc9fa 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataRecordFactory.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataRecordFactory.java
@@ -27,7 +27,6 @@
 import org.apache.iceberg.flink.data.RowDataUtil;
 
 class RowDataRecordFactory implements RecordFactory<RowData> {
-
   private final RowType rowType;
   private final TypeSerializer[] fieldSerializers;
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
index 02692dd86668..d8d13090cca6 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
@@ -32,9 +32,7 @@
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 
 public class IcebergSourceSplit implements SourceSplit, Serializable {
-
   private final CombinedScanTask task;
-
   /**
    * Position field is mutable
    */
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
index dcac972bd06f..86aa82fb1656 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
@@ -29,9 +29,7 @@
  * will switch to more stable serializer from issue-1698.
  */
 public class IcebergSourceSplitSerializer implements SimpleVersionedSerializer<IcebergSourceSplit> {
-
   public static final IcebergSourceSplitSerializer INSTANCE = new IcebergSourceSplitSerializer();
-
   private static final int VERSION = 1;
 
   @Override

From ef0b9377ac62b40de4f86fd77121c0ff6ab48eef Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Wed, 8 Sep 2021 11:17:24 -0700
Subject: [PATCH 03/14] rename class

---
 ...nction.java => RowDataReaderFunction.java} |  4 +--
 .../reader/TestIcebergSourceSplitReader.java  | 36 ++++++++++---------
 ...on.java => TestRowDataReaderFunction.java} |  6 ++--
 3 files changed, 24 insertions(+), 22 deletions(-)
 rename flink/src/main/java/org/apache/iceberg/flink/source/reader/{RowDataIteratorReaderFunction.java => RowDataReaderFunction.java} (93%)
 rename flink/src/test/java/org/apache/iceberg/flink/source/reader/{TestRowDataIteratorReaderFunction.java => TestRowDataReaderFunction.java} (90%)

diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataReaderFunction.java
similarity index 93%
rename from flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java
rename to flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataReaderFunction.java
index c90df52f349e..907675543bd0 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataIteratorReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataReaderFunction.java
@@ -28,11 +28,11 @@
 import org.apache.iceberg.flink.source.ScanContext;
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 
-public class RowDataIteratorReaderFunction extends DataIteratorReaderFunction<RowData> {
+public class RowDataReaderFunction extends DataIteratorReaderFunction<RowData> {
   private final Table table;
   private final ScanContext scanContext;
 
-  public RowDataIteratorReaderFunction(
+  public RowDataReaderFunction(
       Configuration config,
       Table table,
       ScanContext scanContext,
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
index e83d1d61dd35..9662a04dbd49 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
@@ -27,6 +27,8 @@
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.splitreader.SplitsAddition;
 import org.apache.flink.connector.file.src.util.RecordAndPosition;
+import org.apache.flink.connector.testutils.source.reader.TestingReaderContext;
+import org.apache.flink.metrics.groups.UnregisteredMetricsGroup;
 import org.apache.flink.table.data.RowData;
 import org.apache.flink.table.types.logical.RowType;
 import org.apache.flink.types.Row;
@@ -44,6 +46,7 @@
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
 import org.junit.Assert;
+import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.junit.Test;
@@ -71,6 +74,8 @@ public class TestIcebergSourceSplitReader {
   private static List<DataFile> dataFileList;
   private static IcebergSourceSplit icebergSplit;
 
+  private IcebergSourceSplitReader reader;
+
   @BeforeClass
   public static void beforeClass() throws IOException {
     final GenericAppenderHelper dataAppender = new GenericAppenderHelper(
@@ -92,13 +97,23 @@ public static void beforeClass() throws IOException {
     icebergSplit = ReaderFunctionTestBase.sortFilesAsAppendOrder(splits.get(0), dataFileList);
   }
 
+  @Before
+  public void before() {
+    reader = createSplitReader();
+  }
+
+  private IcebergSourceSplitReader createSplitReader() {
+    final Configuration config = new Configuration();
+    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
+    return new IcebergSourceSplitReader(
+        new RowDataReaderFunction(config, tableResource.table(), scanContext, rowType),
+        new TestingReaderContext(),
+        new IcebergSourceReaderMetrics(new UnregisteredMetricsGroup()));
+  }
+
   @Test
   public void testFullScan() throws Exception {
     final IcebergSourceSplit split = icebergSplit;
-    final Configuration config = new Configuration();
-    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
-    IcebergSourceSplitReader reader = new IcebergSourceSplitReader(
-        new RowDataIteratorReaderFunction(config, tableResource.table(), scanContext, rowType));
     reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch0 = reader.fetch();
@@ -122,10 +137,6 @@ public void testFullScan() throws Exception {
   @Test
   public void testResumeFromEndOfFirstBatch() throws Exception {
     final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 0L, 2L);
-    final Configuration config = new Configuration();
-    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
-    IcebergSourceSplitReader reader = new IcebergSourceSplitReader(
-        new RowDataIteratorReaderFunction(config, tableResource.table(), scanContext, rowType));
     reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
@@ -144,10 +155,6 @@ public void testResumeFromEndOfFirstBatch() throws Exception {
   @Test
   public void testResumeFromStartOfSecondBatch() throws Exception {
     final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 1L, 0L);
-    final Configuration config = new Configuration();
-    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
-    IcebergSourceSplitReader reader = new IcebergSourceSplitReader(
-        new RowDataIteratorReaderFunction(config, tableResource.table(), scanContext, rowType));
     reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
@@ -167,11 +174,6 @@ public void testResumeFromStartOfSecondBatch() throws Exception {
   @Test
   public void testResumeFromMiddleOfSecondBatch() throws Exception {
     final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 1L, 1L);
-
-    final Configuration config = new Configuration();
-    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
-    IcebergSourceSplitReader reader = new IcebergSourceSplitReader(
-        new RowDataIteratorReaderFunction(config, tableResource.table(), scanContext, rowType));
     reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataIteratorReaderFunction.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataReaderFunction.java
similarity index 90%
rename from flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataIteratorReaderFunction.java
rename to flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataReaderFunction.java
index 247b788403be..558bd2785552 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataIteratorReaderFunction.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataReaderFunction.java
@@ -35,7 +35,7 @@
 import org.apache.iceberg.flink.TestFixtures;
 import org.apache.iceberg.flink.TestHelpers;
 
-public class TestRowDataIteratorReaderFunction extends ReaderFunctionTestBase<RowData> {
+public class TestRowDataReaderFunction extends ReaderFunctionTestBase<RowData> {
 
   protected static final RowType rowType = FlinkSchemaUtil
       .convert(scanContext.project());
@@ -44,13 +44,13 @@ public class TestRowDataIteratorReaderFunction extends ReaderFunctionTestBase<Ro
   private static final org.apache.flink.configuration.Configuration flinkConfig =
       new org.apache.flink.configuration.Configuration();
 
-  public TestRowDataIteratorReaderFunction(FileFormat fileFormat) {
+  public TestRowDataReaderFunction(FileFormat fileFormat) {
     super(fileFormat);
   }
 
   @Override
   protected ReaderFunction<RowData> readerFunction() {
-    return new RowDataIteratorReaderFunction(new Configuration(), tableResource.table(), scanContext, rowType);
+    return new RowDataReaderFunction(new Configuration(), tableResource.table(), scanContext, rowType);
   }
 
   @Override

From 5b3f5fd7480be7b6bf7f6ce97c1ce2ca51d91127 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Wed, 8 Sep 2021 11:46:38 -0700
Subject: [PATCH 04/14] sync up with uber branch

---
 .../flink/source/reader/ReaderFunctionTestBase.java  | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
index 54abab75341d..b109d823733a 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
@@ -156,7 +156,7 @@ private List<T> extractRecordsAndAssertPosition(
   @Test
   public void testNoCheckpointedPosition() throws IOException {
     final IcebergSourceSplit split = icebergSplit;
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
     final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0L, 0L);
@@ -179,7 +179,7 @@ public void testCheckpointedPositionBeforeFirstFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(0L, 0L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
     final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0L, 0L);
@@ -202,7 +202,7 @@ public void testCheckpointedPositionMiddleFirstFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(0L, 1L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
     final List<T> actual0 = extractRecordsAndAssertPosition(batch0, 1L, 0L, 1L);
@@ -225,7 +225,7 @@ public void testCheckpointedPositionAfterFirstFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(0L, 2L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
     final List<T> actual1 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(1).size(), 1L, 0L);
@@ -243,7 +243,7 @@ public void testCheckpointedPositionBeforeSecondFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(1L, 0L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
     final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
@@ -261,7 +261,7 @@ public void testCheckpointedPositionMidSecondFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(1L, 1L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
     final List<T> actual1 = extractRecordsAndAssertPosition(batch1, 1L, 1L, 1L);

From ce4cfad6d75ca34c8e5b6134f2b936c059c0fdb5 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Wed, 8 Sep 2021 13:02:26 -0700
Subject: [PATCH 05/14] make ReaderFunction a functional interface

---
 .../source/reader/DataIteratorReaderFunction.java    |  2 +-
 .../source/reader/IcebergSourceSplitReader.java      |  2 +-
 .../iceberg/flink/source/reader/ReaderFunction.java  |  7 ++++---
 .../flink/source/reader/ReaderFunctionTestBase.java  | 12 ++++++------
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
index 364879401316..d587ed8ba83e 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
@@ -35,7 +35,7 @@ public DataIteratorReaderFunction(DataIteratorBatcher<T> batcher) {
   public abstract DataIterator<T> createDataIterator(IcebergSourceSplit split);
 
   @Override
-  public CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> read(IcebergSourceSplit split) {
+  public CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> apply(IcebergSourceSplit split) {
     DataIterator<T> inputIterator = createDataIterator(split);
     if (split.position() != null) {
       inputIterator.seek(split.position());
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
index 359a352864d1..023feb9fe7aa 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
@@ -101,7 +101,7 @@ private void checkSplitOrStartNext() throws IOException {
       throw new IOException("No split remaining");
     }
     currentSplitId = nextSplit.splitId();
-    currentReader = readerFunction.read(nextSplit);
+    currentReader = readerFunction.apply(nextSplit);
   }
 
   private FileRecords<T> finishSplit() throws IOException {
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
index c3a32bc0e05d..d8add3d257cc 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
@@ -20,12 +20,13 @@
 package org.apache.iceberg.flink.source.reader;
 
 import java.io.Serializable;
+import java.util.function.Function;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.io.CloseableIterator;
 
-public interface ReaderFunction<T> extends Serializable {
-  CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> read(IcebergSourceSplit split);
+@FunctionalInterface
+public interface ReaderFunction<T> extends Serializable, Function<IcebergSourceSplit,
+    CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>>> {
 }
-
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
index b109d823733a..54abab75341d 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
@@ -156,7 +156,7 @@ private List<T> extractRecordsAndAssertPosition(
   @Test
   public void testNoCheckpointedPosition() throws IOException {
     final IcebergSourceSplit split = icebergSplit;
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
     final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0L, 0L);
@@ -179,7 +179,7 @@ public void testCheckpointedPositionBeforeFirstFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(0L, 0L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
     final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0L, 0L);
@@ -202,7 +202,7 @@ public void testCheckpointedPositionMiddleFirstFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(0L, 1L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
     final List<T> actual0 = extractRecordsAndAssertPosition(batch0, 1L, 0L, 1L);
@@ -225,7 +225,7 @@ public void testCheckpointedPositionAfterFirstFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(0L, 2L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
     final List<T> actual1 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(1).size(), 1L, 0L);
@@ -243,7 +243,7 @@ public void testCheckpointedPositionBeforeSecondFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(1L, 0L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
     final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
@@ -261,7 +261,7 @@ public void testCheckpointedPositionMidSecondFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
         new Position(1L, 1L));
-    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().read(split);
+    final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
     final List<T> actual1 = extractRecordsAndAssertPosition(batch1, 1L, 1L, 1L);

From c2b7eea3a55a7cb4b9d9de503ed66a2c527e38db Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Mon, 20 Sep 2021 09:22:16 -0700
Subject: [PATCH 06/14] support includeColumnStats in ScanContext.

This is needed for event time aligned assigner for FLIP-27 source.
---
 .../flink/source/FlinkSplitGenerator.java     |  4 ++++
 .../iceberg/flink/source/ScanContext.java     | 24 ++++++++++++++++---
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java
index 1ba396c187e9..be1a9138d60c 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java
@@ -65,6 +65,10 @@ static CloseableIterable<CombinedScanTask> planTasks(Table table, ScanContext co
         .caseSensitive(context.caseSensitive())
         .project(context.project());
 
+    if (context.includeColumnStats()) {
+      scan = scan.includeColumnStats();
+    }
+
     if (context.snapshotId() != null) {
       scan = scan.useSnapshot(context.snapshotId());
     }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/ScanContext.java b/flink/src/main/java/org/apache/iceberg/flink/source/ScanContext.java
index b0336d70f179..73a31930cd35 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/ScanContext.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/ScanContext.java
@@ -68,6 +68,9 @@ public class ScanContext implements Serializable {
   private static final ConfigOption<Duration> MONITOR_INTERVAL =
       ConfigOptions.key("monitor-interval").durationType().defaultValue(Duration.ofSeconds(10));
 
+  private static final ConfigOption<Boolean> INCLUDE_COLUMN_STATS =
+      ConfigOptions.key("include-column-stats").booleanType().defaultValue(false);
+
   private final boolean caseSensitive;
   private final Long snapshotId;
   private final Long startSnapshotId;
@@ -83,11 +86,12 @@ public class ScanContext implements Serializable {
   private final Schema schema;
   private final List<Expression> filters;
   private final long limit;
+  private final boolean includeColumnStats;
 
   private ScanContext(boolean caseSensitive, Long snapshotId, Long startSnapshotId, Long endSnapshotId,
                       Long asOfTimestamp, Long splitSize, Integer splitLookback, Long splitOpenFileCost,
                       boolean isStreaming, Duration monitorInterval, String nameMapping,
-                      Schema schema, List<Expression> filters, long limit) {
+                      Schema schema, List<Expression> filters, long limit, boolean includeColumnStats) {
     this.caseSensitive = caseSensitive;
     this.snapshotId = snapshotId;
     this.startSnapshotId = startSnapshotId;
@@ -103,6 +107,7 @@ private ScanContext(boolean caseSensitive, Long snapshotId, Long startSnapshotId
     this.schema = schema;
     this.filters = filters;
     this.limit = limit;
+    this.includeColumnStats = includeColumnStats;
   }
 
   public boolean caseSensitive() {
@@ -161,6 +166,10 @@ public long limit() {
     return limit;
   }
 
+  public boolean includeColumnStats() {
+    return includeColumnStats;
+  }
+
   public ScanContext copyWithAppendsBetween(long newStartSnapshotId, long newEndSnapshotId) {
     return ScanContext.builder()
         .caseSensitive(caseSensitive)
@@ -177,6 +186,7 @@ public ScanContext copyWithAppendsBetween(long newStartSnapshotId, long newEndSn
         .project(schema)
         .filters(filters)
         .limit(limit)
+        .includeColumnStats(includeColumnStats)
         .build();
   }
 
@@ -196,6 +206,7 @@ public ScanContext copyWithSnapshotId(long newSnapshotId) {
         .project(schema)
         .filters(filters)
         .limit(limit)
+        .includeColumnStats(includeColumnStats)
         .build();
   }
 
@@ -218,6 +229,7 @@ public static class Builder {
     private Schema projectedSchema;
     private List<Expression> filters;
     private long limit = -1L;
+    private boolean includeColumnStats = INCLUDE_COLUMN_STATS.defaultValue();
 
     private Builder() {
     }
@@ -292,6 +304,11 @@ public Builder limit(long newLimit) {
       return this;
     }
 
+    public Builder includeColumnStats(boolean newIncludeColumnStats) {
+      this.includeColumnStats = newIncludeColumnStats;
+      return this;
+    }
+
     public Builder fromProperties(Map<String, String> properties) {
       Configuration config = new Configuration();
       properties.forEach(config::setString);
@@ -306,14 +323,15 @@ public Builder fromProperties(Map<String, String> properties) {
           .splitOpenFileCost(config.get(SPLIT_FILE_OPEN_COST))
           .streaming(config.get(STREAMING))
           .monitorInterval(config.get(MONITOR_INTERVAL))
-          .nameMapping(properties.get(DEFAULT_NAME_MAPPING));
+          .nameMapping(properties.get(DEFAULT_NAME_MAPPING))
+          .includeColumnStats(config.get(INCLUDE_COLUMN_STATS));
     }
 
     public ScanContext build() {
       return new ScanContext(caseSensitive, snapshotId, startSnapshotId,
           endSnapshotId, asOfTimestamp, splitSize, splitLookback,
           splitOpenFileCost, isStreaming, monitorInterval, nameMapping, projectedSchema,
-          filters, limit);
+          filters, limit, includeColumnStats);
     }
   }
 }

From 93e3e498b76b2aff888957f724d1e431b5de8de9 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Mon, 20 Sep 2021 11:12:02 -0700
Subject: [PATCH 07/14] rename FlinkSplitGenerator to FlinkSplitPlanner. also
 marked some public classes as @Internal

---
 .../apache/iceberg/flink/source/FlinkInputFormat.java |  2 +-
 ...linkSplitGenerator.java => FlinkSplitPlanner.java} | 11 ++++++++---
 .../org/apache/iceberg/flink/source/Position.java     |  2 ++
 .../flink/source/StreamingMonitorFunction.java        |  2 +-
 .../iceberg/flink/source/reader/FileRecords.java      |  2 ++
 .../source/reader/IcebergSourceReaderMetrics.java     |  2 ++
 .../flink/source/split/IcebergSourceSplit.java        |  2 ++
 .../source/split/IcebergSourceSplitSerializer.java    |  2 ++
 .../flink/source/TestStreamingReaderOperator.java     |  2 +-
 .../flink/source/reader/ReaderFunctionTestBase.java   |  4 ++--
 .../source/reader/TestIcebergSourceSplitReader.java   |  4 ++--
 .../iceberg/flink/source/split/SplitHelpers.java      |  4 ++--
 12 files changed, 27 insertions(+), 12 deletions(-)
 rename flink/src/main/java/org/apache/iceberg/flink/source/{FlinkSplitGenerator.java => FlinkSplitPlanner.java} (93%)

diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkInputFormat.java b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkInputFormat.java
index 8b757ac31606..a4cbab5c37e4 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkInputFormat.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkInputFormat.java
@@ -77,7 +77,7 @@ public FlinkInputSplit[] createInputSplits(int minNumSplits) throws IOException
     tableLoader.open();
     try (TableLoader loader = tableLoader) {
       Table table = loader.loadTable();
-      return FlinkSplitGenerator.createInputSplits(table, context);
+      return FlinkSplitPlanner.planInputSplits(table, context);
     }
   }
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitPlanner.java
similarity index 93%
rename from flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java
rename to flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitPlanner.java
index be1a9138d60c..ef0f71c05a67 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitGenerator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/FlinkSplitPlanner.java
@@ -22,6 +22,7 @@
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.List;
+import org.apache.flink.annotation.Internal;
 import org.apache.iceberg.CombinedScanTask;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
@@ -31,11 +32,12 @@
 import org.apache.iceberg.io.CloseableIterable;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 
-public class FlinkSplitGenerator {
-  private FlinkSplitGenerator() {
+@Internal
+public class FlinkSplitPlanner {
+  private FlinkSplitPlanner() {
   }
 
-  static FlinkInputSplit[] createInputSplits(Table table, ScanContext context) {
+  static FlinkInputSplit[] planInputSplits(Table table, ScanContext context) {
     try (CloseableIterable<CombinedScanTask> tasksIterable = planTasks(table, context)) {
       List<CombinedScanTask> tasks = Lists.newArrayList(tasksIterable);
       FlinkInputSplit[] splits = new FlinkInputSplit[tasks.size()];
@@ -48,6 +50,9 @@ static FlinkInputSplit[] createInputSplits(Table table, ScanContext context) {
     }
   }
 
+  /**
+   * This returns splits for the FLIP-27 source
+   */
   public static List<IcebergSourceSplit> planIcebergSourceSplits(
       Table table, ScanContext context) {
     try (CloseableIterable<CombinedScanTask> tasksIterable = planTasks(table, context)) {
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/Position.java b/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
index 9ac2c89e0972..28c4c530fc2e 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
@@ -21,6 +21,7 @@
 
 import java.io.Serializable;
 import java.util.Objects;
+import org.apache.flink.annotation.Internal;
 import org.apache.iceberg.CombinedScanTask;
 import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
 
@@ -31,6 +32,7 @@
  *   <li>record offset within a file</li>
  * </ul>
  */
+@Internal
 public class Position implements Serializable {
 
   private static final long serialVersionUID = 1L;
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/StreamingMonitorFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/StreamingMonitorFunction.java
index b31426a099f0..7913a18bde9c 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/StreamingMonitorFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/StreamingMonitorFunction.java
@@ -140,7 +140,7 @@ private void monitorAndForwardSplits() {
         newScanContext = scanContext.copyWithAppendsBetween(lastSnapshotId, snapshotId);
       }
 
-      FlinkInputSplit[] splits = FlinkSplitGenerator.createInputSplits(table, newScanContext);
+      FlinkInputSplit[] splits = FlinkSplitPlanner.planInputSplits(table, newScanContext);
       for (FlinkInputSplit split : splits) {
         sourceContext.collect(split);
       }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java
index 256660dd0a7a..9ae527b68df9 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java
@@ -23,6 +23,7 @@
 import java.util.Collections;
 import java.util.Set;
 import javax.annotation.Nullable;
+import org.apache.flink.annotation.Internal;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.io.CloseableIterator;
@@ -30,6 +31,7 @@
 /**
  * A batch of recrods for one split
  */
+@Internal
 public class FileRecords<T> implements RecordsWithSplitIds<RecordAndPosition<T>> {
 
   @Nullable
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
index cb81d54bab90..9e9d419bd5f0 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceReaderMetrics.java
@@ -20,9 +20,11 @@
 package org.apache.iceberg.flink.source.reader;
 
 import java.util.concurrent.atomic.AtomicLong;
+import org.apache.flink.annotation.Internal;
 import org.apache.flink.metrics.Counter;
 import org.apache.flink.metrics.MetricGroup;
 
+@Internal
 public class IcebergSourceReaderMetrics {
   private final AtomicLong numRecordsOut;
   private final AtomicLong assignedSplits;
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
index d8d13090cca6..d429f5813bfc 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
@@ -23,6 +23,7 @@
 import java.util.Collection;
 import java.util.stream.Collectors;
 import javax.annotation.Nullable;
+import org.apache.flink.annotation.Internal;
 import org.apache.flink.api.connector.source.SourceSplit;
 import org.apache.iceberg.CombinedScanTask;
 import org.apache.iceberg.FileScanTask;
@@ -31,6 +32,7 @@
 import org.apache.iceberg.relocated.com.google.common.base.Objects;
 import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
 
+@Internal
 public class IcebergSourceSplit implements SourceSplit, Serializable {
   private final CombinedScanTask task;
   /**
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
index 86aa82fb1656..a208e94f9f14 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
@@ -20,6 +20,7 @@
 package org.apache.iceberg.flink.source.split;
 
 import java.io.IOException;
+import org.apache.flink.annotation.Internal;
 import org.apache.flink.annotation.VisibleForTesting;
 import org.apache.flink.core.io.SimpleVersionedSerializer;
 import org.apache.flink.util.InstantiationUtil;
@@ -28,6 +29,7 @@
  * TODO: use Java serialization for now.
  * will switch to more stable serializer from issue-1698.
  */
+@Internal
 public class IcebergSourceSplitSerializer implements SimpleVersionedSerializer<IcebergSourceSplit> {
   public static final IcebergSourceSplitSerializer INSTANCE = new IcebergSourceSplitSerializer();
   private static final int VERSION = 1;
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/TestStreamingReaderOperator.java b/flink/src/test/java/org/apache/iceberg/flink/source/TestStreamingReaderOperator.java
index 0f5d6e1e4975..353fd8dfda0d 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/TestStreamingReaderOperator.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/TestStreamingReaderOperator.java
@@ -254,7 +254,7 @@ private List<FlinkInputSplit> generateSplits() {
             .build();
       }
 
-      Collections.addAll(inputSplits, FlinkSplitGenerator.createInputSplits(table, scanContext));
+      Collections.addAll(inputSplits, FlinkSplitPlanner.planInputSplits(table, scanContext));
     }
 
     return inputSplits;
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
index 54abab75341d..9ee66f3374cb 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
@@ -36,7 +36,7 @@
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.flink.HadoopTableResource;
 import org.apache.iceberg.flink.TestFixtures;
-import org.apache.iceberg.flink.source.FlinkSplitGenerator;
+import org.apache.iceberg.flink.source.FlinkSplitPlanner;
 import org.apache.iceberg.flink.source.Position;
 import org.apache.iceberg.flink.source.ScanContext;
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
@@ -103,7 +103,7 @@ public void before() throws IOException {
       dataAppender.appendToTable(dataFile);
     }
 
-    final List<IcebergSourceSplit> splits = FlinkSplitGenerator
+    final List<IcebergSourceSplit> splits = FlinkSplitPlanner
         .planIcebergSourceSplits(tableResource.table(), scanContext);
     Assert.assertEquals(1, splits.size());
     Assert.assertEquals(3, splits.get(0).task().files().size());
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
index 9662a04dbd49..6c37b55569cf 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
@@ -41,7 +41,7 @@
 import org.apache.iceberg.flink.HadoopTableResource;
 import org.apache.iceberg.flink.TestFixtures;
 import org.apache.iceberg.flink.TestHelpers;
-import org.apache.iceberg.flink.source.FlinkSplitGenerator;
+import org.apache.iceberg.flink.source.FlinkSplitPlanner;
 import org.apache.iceberg.flink.source.ScanContext;
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
@@ -90,7 +90,7 @@ public static void beforeClass() throws IOException {
       dataAppender.appendToTable(dataFile);
     }
 
-    final List<IcebergSourceSplit> splits = FlinkSplitGenerator
+    final List<IcebergSourceSplit> splits = FlinkSplitPlanner
         .planIcebergSourceSplits(tableResource.table(), scanContext);
     Assert.assertEquals(1, splits.size());
     Assert.assertEquals(3, splits.get(0).task().files().size());
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/split/SplitHelpers.java b/flink/src/test/java/org/apache/iceberg/flink/source/split/SplitHelpers.java
index 91cf7c0526c6..c97111b6fd97 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/split/SplitHelpers.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/split/SplitHelpers.java
@@ -34,7 +34,7 @@
 import org.apache.iceberg.data.RandomGenericData;
 import org.apache.iceberg.data.Record;
 import org.apache.iceberg.flink.TestFixtures;
-import org.apache.iceberg.flink.source.FlinkSplitGenerator;
+import org.apache.iceberg.flink.source.FlinkSplitPlanner;
 import org.apache.iceberg.flink.source.ScanContext;
 import org.apache.iceberg.hadoop.HadoopCatalog;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
@@ -79,7 +79,7 @@ public static List<IcebergSourceSplit> createFileSplits(
       }
 
       final ScanContext scanContext = ScanContext.builder().build();
-      final List<IcebergSourceSplit> splits = FlinkSplitGenerator.planIcebergSourceSplits(table, scanContext);
+      final List<IcebergSourceSplit> splits = FlinkSplitPlanner.planIcebergSourceSplits(table, scanContext);
       return splits.stream()
           .flatMap(split -> {
             List<List<FileScanTask>> filesList = Lists.partition(new ArrayList<>(split.task().files()), filesPerSplit);

From 887436ff788bb05d9ec2aa394152506a90b7b472 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Mon, 20 Sep 2021 11:27:36 -0700
Subject: [PATCH 08/14] rename fetch-batch-size to fetch-record-batch-size to
 be more clear

---
 .../java/org/apache/iceberg/flink/FlinkConfigOptions.java   | 6 +++---
 .../flink/source/reader/ArrayPoolDataIteratorBatcher.java   | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java b/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
index d3fb0268fa4f..b9e7d0204652 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
@@ -41,9 +41,9 @@ private FlinkConfigOptions() {
           .defaultValue(100)
           .withDescription("Sets max infer parallelism for source operator.");
 
-  public static final ConfigOption<Integer> SOURCE_READER_FETCH_BATCH_SIZE = ConfigOptions
-      .key("source.iceberg.reader.fetch-batch-size")
+  public static final ConfigOption<Integer> SOURCE_READER_FETCH_RECORD_BATCH_SIZE = ConfigOptions
+      .key("source.iceberg.reader.fetch-record-batch-size")
       .intType()
       .defaultValue(2048)
-      .withDescription("The target batch size for split reader fetch.");
+      .withDescription("The target record batch size for split reader fetch.");
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
index fc03fc4683fb..52e7a6be7366 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
@@ -56,7 +56,7 @@ private class ArrayPoolBatchIterator implements CloseableIterator<RecordsWithSpl
     ArrayPoolBatchIterator(String splitId, DataIterator<T> inputIterator) {
       this.splitId = splitId;
       this.inputIterator = inputIterator;
-      this.batchSize = config.getInteger(FlinkConfigOptions.SOURCE_READER_FETCH_BATCH_SIZE);
+      this.batchSize = config.getInteger(FlinkConfigOptions.SOURCE_READER_FETCH_RECORD_BATCH_SIZE);
       this.pool = createPoolOfBatches(config.getInteger(SourceReaderOptions.ELEMENT_QUEUE_CAPACITY));
     }
 

From 75b29cbf1473e73c5f1c487191b36700514cfe51 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Sun, 26 Sep 2021 21:21:07 -0700
Subject: [PATCH 09/14] Address Thomas' comments

---
 .../reader/ArrayPoolDataIteratorBatcher.java       |  2 +-
 .../flink/source/reader/DataIteratorBatcher.java   |  5 +++++
 .../source/reader/DataIteratorReaderFunction.java  |  3 +++
 .../source/reader/IcebergSourceSplitReader.java    |  4 ++--
 .../source/reader/RecyclableArrayIterator.java     | 11 +++++------
 .../reader/{FileRecords.java => SplitRecords.java} | 14 +++++++-------
 .../flink/source/split/IcebergSourceSplit.java     |  4 ++--
 .../source/split/IcebergSourceSplitSerializer.java |  3 ++-
 8 files changed, 27 insertions(+), 19 deletions(-)
 rename flink/src/main/java/org/apache/iceberg/flink/source/reader/{FileRecords.java => SplitRecords.java} (86%)

diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
index 52e7a6be7366..c4763a4e0f31 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
@@ -83,7 +83,7 @@ public RecordsWithSplitIds<RecordAndPosition<T>> next() {
         return null;
       } else {
         Position position = inputIterator.position();
-        return FileRecords.forRecords(splitId, new RecyclableArrayIterator<>(
+        return SplitRecords.forRecords(splitId, new RecyclableArrayIterator<>(
             pool.recycler(), batch, num, position.fileOffset(), position.recordOffset() - num));
       }
     }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
index d2784f253279..0f0d16a6ab92 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
@@ -21,10 +21,15 @@
 
 import java.io.Serializable;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
+import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
 import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.flink.source.DataIterator;
 import org.apache.iceberg.io.CloseableIterator;
 
+/**
+ * Batcher converts iterator of T into iterator of batched {@code RecordsWithSplitIds<RecordAndPosition<T>>},
+ * which is what FLIP-27's {@link SplitReader#fetch()} returns.
+ */
 @FunctionalInterface
 public interface DataIteratorBatcher<T> extends Serializable {
   CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> apply(String splitId, DataIterator<T> inputIterator);
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
index d587ed8ba83e..5d32a359ff51 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
@@ -25,6 +25,9 @@
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.io.CloseableIterator;
 
+/**
+ * A {@link ReaderFunction} implementation that uses {@link DataIterator}.
+ */
 public abstract class DataIteratorReaderFunction<T> implements ReaderFunction<T> {
   private final DataIteratorBatcher<T> batcher;
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
index 023feb9fe7aa..3003100968ed 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
@@ -104,12 +104,12 @@ private void checkSplitOrStartNext() throws IOException {
     currentReader = readerFunction.apply(nextSplit);
   }
 
-  private FileRecords<T> finishSplit() throws IOException {
+  private SplitRecords<T> finishSplit() throws IOException {
     if (currentReader != null) {
       currentReader.close();
       currentReader = null;
     }
-    final FileRecords<T> finishRecords = FileRecords.finishedSplit(currentSplitId);
+    final SplitRecords<T> finishRecords = SplitRecords.finishedSplit(currentSplitId);
     LOG.debug("Split reader {} finished split: {}", indexOfSubtask, currentSplitId);
     currentSplitId = null;
     metrics.incrementFinishedSplits(1L);
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
index e0bd19ed3f76..9ffe6654df31 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
@@ -30,6 +30,11 @@
 /**
  * Similar to the {@link ArrayResultIterator}.
  * Main difference is the records array can be recycled back to a pool.
+ *
+ * Each record's {@link RecordAndPosition} will have the same fileOffset (for {@link RecordAndPosition#getOffset()}.
+ * The first returned record will have a records-to-skip count of {@code recordOffset + 1}, following
+ * the contract that each record needs to point to the position AFTER itself
+ * (because a checkpoint taken after the record was emitted needs to resume from after that record).
  */
 final class RecyclableArrayIterator<E> implements CloseableIterator<RecordAndPosition<E>> {
   private final Pool.Recycler<E[]> recycler;
@@ -43,12 +48,6 @@ final class RecyclableArrayIterator<E> implements CloseableIterator<RecordAndPos
     this(recycler, null, 0, CheckpointedPosition.NO_OFFSET, 0L);
   }
 
-  /**
-   * Each record's {@link RecordAndPosition} will have the same fileOffset (for {@link RecordAndPosition#getOffset()}.
-   * The first returned record will have a records-to-skip count of {@code recordOffset + 1}, following
-   * the contract that each record needs to point to the position AFTER itself
-   * (because a checkpoint taken after the record was emitted needs to resume from after that record).
-   */
   RecyclableArrayIterator(
       Pool.Recycler<E[]> recycler, final E[] newRecords,
       final int newNum, final long fileOffset, final long recordOffset) {
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/SplitRecords.java
similarity index 86%
rename from flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java
rename to flink/src/main/java/org/apache/iceberg/flink/source/reader/SplitRecords.java
index 9ae527b68df9..22e3de1a4eb0 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/FileRecords.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/SplitRecords.java
@@ -29,10 +29,10 @@
 import org.apache.iceberg.io.CloseableIterator;
 
 /**
- * A batch of recrods for one split
+ * A batch of records for one split
  */
 @Internal
-public class FileRecords<T> implements RecordsWithSplitIds<RecordAndPosition<T>> {
+public class SplitRecords<T> implements RecordsWithSplitIds<RecordAndPosition<T>> {
 
   @Nullable
   private final CloseableIterator<RecordAndPosition<T>> recordsForSplit;
@@ -43,7 +43,7 @@ public class FileRecords<T> implements RecordsWithSplitIds<RecordAndPosition<T>>
   @Nullable
   private CloseableIterator<RecordAndPosition<T>> recordsForSplitCurrent;
 
-  private FileRecords(
+  private SplitRecords(
       @Nullable String splitId,
       @Nullable CloseableIterator<RecordAndPosition<T>> recordsForSplit,
       Set<String> finishedSplits) {
@@ -92,12 +92,12 @@ public Set<String> finishedSplits() {
     return finishedSplits;
   }
 
-  public static <T> FileRecords<T> forRecords(
+  public static <T> SplitRecords<T> forRecords(
       final String splitId, final CloseableIterator<RecordAndPosition<T>> recordsForSplit) {
-    return new FileRecords<>(splitId, recordsForSplit, Collections.emptySet());
+    return new SplitRecords<>(splitId, recordsForSplit, Collections.emptySet());
   }
 
-  public static <T> FileRecords<T> finishedSplit(String splitId) {
-    return new FileRecords<>(null, null, Collections.singleton(splitId));
+  public static <T> SplitRecords<T> finishedSplit(String splitId) {
+    return new SplitRecords<>(null, null, Collections.singleton(splitId));
   }
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
index d429f5813bfc..dc1e77352b07 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
@@ -70,11 +70,11 @@ public Position position() {
     return position;
   }
 
-  public byte[] serializedFormCache() {
+  byte[] serializedFormCache() {
     return serializedFormCache;
   }
 
-  public void serializedFormCache(byte[] cachedBytes) {
+  void serializedFormCache(byte[] cachedBytes) {
     this.serializedFormCache = cachedBytes;
   }
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
index a208e94f9f14..9bb65497ff37 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplitSerializer.java
@@ -27,7 +27,8 @@
 
 /**
  * TODO: use Java serialization for now.
- * will switch to more stable serializer from issue-1698.
+ * Will switch to more stable serializer from
+ * <a href="https://github.com/apache/iceberg/issues/1698">issue-1698</a>.
  */
 @Internal
 public class IcebergSourceSplitSerializer implements SimpleVersionedSerializer<IcebergSourceSplit> {

From db48cd717b873820b499fddef61ecd9c8dad05de Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Wed, 20 Oct 2021 11:26:18 -0700
Subject: [PATCH 10/14] Constrcut RowType internally inside
 RowDataReaderFunction from read schema. also make projected schema optional
 in ScanContext

---
 .../source/reader/RowDataReaderFunction.java   | 18 +++++++++++++-----
 .../reader/TestIcebergSourceSplitReader.java   |  5 +----
 .../reader/TestRowDataReaderFunction.java      |  2 +-
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataReaderFunction.java
index 907675543bd0..aa70097b871b 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RowDataReaderFunction.java
@@ -21,8 +21,9 @@
 
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.table.data.RowData;
-import org.apache.flink.table.types.logical.RowType;
+import org.apache.iceberg.Schema;
 import org.apache.iceberg.Table;
+import org.apache.iceberg.flink.FlinkSchemaUtil;
 import org.apache.iceberg.flink.source.DataIterator;
 import org.apache.iceberg.flink.source.RowDataFileScanTaskReader;
 import org.apache.iceberg.flink.source.ScanContext;
@@ -31,15 +32,17 @@
 public class RowDataReaderFunction extends DataIteratorReaderFunction<RowData> {
   private final Table table;
   private final ScanContext scanContext;
+  private final Schema readSchema;
 
   public RowDataReaderFunction(
       Configuration config,
       Table table,
-      ScanContext scanContext,
-      RowType rowType) {
-    super(new ArrayPoolDataIteratorBatcher<>(config, new RowDataRecordFactory(rowType)));
+      ScanContext scanContext) {
+    super(new ArrayPoolDataIteratorBatcher<>(config, new RowDataRecordFactory(
+        FlinkSchemaUtil.convert(readSchema(table, scanContext)))));
     this.table = table;
     this.scanContext = scanContext;
+    this.readSchema = readSchema(table, scanContext);
   }
 
   @Override
@@ -47,11 +50,16 @@ public DataIterator<RowData> createDataIterator(IcebergSourceSplit split) {
     return new DataIterator<>(
         new RowDataFileScanTaskReader(
             table.schema(),
-            scanContext.project(),
+            readSchema,
             scanContext.nameMapping(),
             scanContext.caseSensitive()),
         split.task(),
         table.io(),
         table.encryption());
   }
+
+  private static Schema readSchema(Table table, ScanContext scanContext) {
+    return scanContext.project() == null ? table.schema() : scanContext.project();
+  }
+
 }
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
index 6c37b55569cf..83d5642f3c00 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
@@ -30,14 +30,12 @@
 import org.apache.flink.connector.testutils.source.reader.TestingReaderContext;
 import org.apache.flink.metrics.groups.UnregisteredMetricsGroup;
 import org.apache.flink.table.data.RowData;
-import org.apache.flink.table.types.logical.RowType;
 import org.apache.flink.types.Row;
 import org.apache.iceberg.DataFile;
 import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.data.GenericAppenderHelper;
 import org.apache.iceberg.data.RandomGenericData;
 import org.apache.iceberg.data.Record;
-import org.apache.iceberg.flink.FlinkSchemaUtil;
 import org.apache.iceberg.flink.HadoopTableResource;
 import org.apache.iceberg.flink.TestFixtures;
 import org.apache.iceberg.flink.TestHelpers;
@@ -104,9 +102,8 @@ public void before() {
 
   private IcebergSourceSplitReader createSplitReader() {
     final Configuration config = new Configuration();
-    RowType rowType = FlinkSchemaUtil.convert(tableResource.table().schema());
     return new IcebergSourceSplitReader(
-        new RowDataReaderFunction(config, tableResource.table(), scanContext, rowType),
+        new RowDataReaderFunction(config, tableResource.table(), scanContext),
         new TestingReaderContext(),
         new IcebergSourceReaderMetrics(new UnregisteredMetricsGroup()));
   }
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataReaderFunction.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataReaderFunction.java
index 558bd2785552..c0f6c62cd033 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataReaderFunction.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRowDataReaderFunction.java
@@ -50,7 +50,7 @@ public TestRowDataReaderFunction(FileFormat fileFormat) {
 
   @Override
   protected ReaderFunction<RowData> readerFunction() {
-    return new RowDataReaderFunction(new Configuration(), tableResource.table(), scanContext, rowType);
+    return new RowDataReaderFunction(new Configuration(), tableResource.table(), scanContext);
   }
 
   @Override

From 819bed56f27fba3a712651993ff5551fc29cf035 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Wed, 20 Oct 2021 11:45:24 -0700
Subject: [PATCH 11/14] reapply dep change after build.gradle refactoring from
 master branch

---
 flink-runtime/build.gradle | 79 ++++++++++++++++++++++++++++++++++++++
 flink/build.gradle         |  2 +
 2 files changed, 81 insertions(+)
 create mode 100644 flink-runtime/build.gradle

diff --git a/flink-runtime/build.gradle b/flink-runtime/build.gradle
new file mode 100644
index 000000000000..805abc03dd0d
--- /dev/null
+++ b/flink-runtime/build.gradle
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+project(':iceberg-flink-runtime') {
+  apply plugin: 'com.github.johnrengelman.shadow'
+
+  tasks.jar.dependsOn tasks.shadowJar
+
+  configurations {
+    implementation {
+      exclude group: 'org.apache.flink'
+      // included in Flink
+      exclude group: 'org.slf4j'
+      exclude group: 'org.apache.commons'
+      exclude group: 'commons-pool'
+      exclude group: 'commons-codec'
+      exclude group: 'org.xerial.snappy'
+      exclude group: 'javax.xml.bind'
+      exclude group: 'javax.annotation'
+    }
+  }
+
+  dependencies {
+    implementation project(':iceberg-flink')
+    implementation project(':iceberg-aws')
+    implementation(project(':iceberg-nessie')) {
+      exclude group: 'com.google.code.findbugs', module: 'jsr305'
+    }
+
+    implementation "org.apache.flink:flink-connector-base"
+  }
+
+  shadowJar {
+    configurations = [project.configurations.runtimeClasspath]
+
+    zip64 true
+
+    // include the LICENSE and NOTICE files for the shaded Jar
+    from(projectDir) {
+      include 'LICENSE'
+      include 'NOTICE'
+    }
+
+    // Relocate dependencies to avoid conflicts
+    relocate 'org.apache.avro', 'org.apache.iceberg.shaded.org.apache.avro'
+    relocate 'org.apache.parquet', 'org.apache.iceberg.shaded.org.apache.parquet'
+    relocate 'com.google', 'org.apache.iceberg.shaded.com.google'
+    relocate 'com.fasterxml', 'org.apache.iceberg.shaded.com.fasterxml'
+    relocate 'com.github.benmanes', 'org.apache.iceberg.shaded.com.github.benmanes'
+    relocate 'org.checkerframework', 'org.apache.iceberg.shaded.org.checkerframework'
+    relocate 'shaded.parquet', 'org.apache.iceberg.shaded.org.apache.parquet.shaded'
+    relocate 'org.apache.orc', 'org.apache.iceberg.shaded.org.apache.orc'
+    relocate 'io.airlift', 'org.apache.iceberg.shaded.io.airlift'
+    relocate 'org.threeten.extra', 'org.apache.iceberg.shaded.org.threeten.extra'
+
+    classifier null
+  }
+
+  jar {
+    enabled = false
+  }
+}
+
diff --git a/flink/build.gradle b/flink/build.gradle
index bbf65177bc3f..936d5bec0e46 100644
--- a/flink/build.gradle
+++ b/flink/build.gradle
@@ -28,6 +28,7 @@ project(':iceberg-flink') {
     implementation project(':iceberg-parquet')
     implementation project(':iceberg-hive-metastore')
 
+    compileOnly "org.apache.flink:flink-connector-base"
     compileOnly "org.apache.flink:flink-streaming-java_2.12"
     compileOnly "org.apache.flink:flink-streaming-java_2.12::tests"
     compileOnly "org.apache.flink:flink-table-api-java-bridge_2.12"
@@ -56,6 +57,7 @@ project(':iceberg-flink') {
       exclude group: 'org.apache.hive', module: 'hive-storage-api'
     }
 
+    testImplementation "org.apache.flink:flink-connector-test-utils"
     testImplementation "org.apache.flink:flink-core"
     testImplementation "org.apache.flink:flink-runtime_2.12"
     testImplementation "org.apache.flink:flink-table-planner-blink_2.12"

From 7e514710a8e131282f6d1a8b7bcf11d4e45620c7 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Fri, 29 Oct 2021 10:04:45 -0700
Subject: [PATCH 12/14] address review comments for split reader

---
 flink-runtime/build.gradle                                     | 2 ++
 .../main/java/org/apache/iceberg/flink/FlinkConfigOptions.java | 2 +-
 .../java/org/apache/iceberg/flink/source/DataIterator.java     | 3 +++
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/flink-runtime/build.gradle b/flink-runtime/build.gradle
index 805abc03dd0d..e93fa5865a73 100644
--- a/flink-runtime/build.gradle
+++ b/flink-runtime/build.gradle
@@ -43,6 +43,8 @@ project(':iceberg-flink-runtime') {
       exclude group: 'com.google.code.findbugs', module: 'jsr305'
     }
 
+    // flink-connector-base is not part of Flink runtime.
+    // Hence, iceberg-flink-runtime should include it a transitive dependency.
     implementation "org.apache.flink:flink-connector-base"
   }
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java b/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
index b9e7d0204652..ebad99951b77 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/FlinkConfigOptions.java
@@ -45,5 +45,5 @@ private FlinkConfigOptions() {
       .key("source.iceberg.reader.fetch-record-batch-size")
       .intType()
       .defaultValue(2048)
-      .withDescription("The target record batch size for split reader fetch.");
+      .withDescription("The target number of records for Iceberg reader fetch batch.");
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java b/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
index 67b719aba5cf..1235ebd8502e 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
@@ -38,7 +38,9 @@
  */
 @Internal
 public class DataIterator<T> implements CloseableIterator<T> {
+
   private final FileScanTaskReader<T> fileScanTaskReader;
+
   private final InputFilesDecryptor inputFilesDecryptor;
   private final CombinedScanTask combinedTask;
 
@@ -49,6 +51,7 @@ public class DataIterator<T> implements CloseableIterator<T> {
   public DataIterator(FileScanTaskReader<T> fileScanTaskReader, CombinedScanTask task,
                       FileIO io, EncryptionManager encryption) {
     this.fileScanTaskReader = fileScanTaskReader;
+
     this.inputFilesDecryptor = new InputFilesDecryptor(task, io, encryption);
     this.combinedTask = task;
 

From 71eaa71a1e282bcb7a8f7287f04f599297af1385 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Fri, 29 Oct 2021 10:20:25 -0700
Subject: [PATCH 13/14] fix comment

---
 flink-runtime/build.gradle | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/flink-runtime/build.gradle b/flink-runtime/build.gradle
index e93fa5865a73..c8a0a8e87487 100644
--- a/flink-runtime/build.gradle
+++ b/flink-runtime/build.gradle
@@ -43,8 +43,8 @@ project(':iceberg-flink-runtime') {
       exclude group: 'com.google.code.findbugs', module: 'jsr305'
     }
 
-    // flink-connector-base is not part of Flink runtime.
-    // Hence, iceberg-flink-runtime should include it a transitive dependency.
+    // flink-connector-base is not part of Flink runtime. Hence,
+    // iceberg-flink-runtime should include it as a transitive dependency.
     implementation "org.apache.flink:flink-connector-base"
   }
 

From d7ec63d4fc6726e7454e7fd0b3f94f3aea68f682 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenz3wu@gmail.com>
Date: Sun, 31 Oct 2021 21:33:11 -0700
Subject: [PATCH 14/14] address Ryan's review comments for split reader

---
 .../iceberg/flink/source/DataIterator.java    | 20 ++---
 .../apache/iceberg/flink/source/Position.java |  8 +-
 .../source/RowDataFileScanTaskReader.java     |  1 +
 .../reader/ArrayPoolDataIteratorBatcher.java  | 13 ++-
 .../source/reader/DataIteratorBatcher.java    |  5 +-
 .../reader/DataIteratorReaderFunction.java    |  3 +-
 .../reader/IcebergSourceSplitReader.java      |  1 -
 .../reader/MutableRecordAndPosition.junk      | 51 +++++++++++
 .../flink/source/reader/ReaderFunction.java   |  1 -
 .../source/reader/RecordAndPosition.java      | 87 +++++++++++++++++++
 .../reader/RecyclableArrayIterator.java       | 17 ++--
 .../flink/source/reader/SplitRecords.java     |  1 -
 .../source/split/IcebergSourceSplit.java      |  6 +-
 .../source/reader/ReaderFunctionTestBase.java | 55 ++++++------
 .../reader/TestIcebergSourceSplitReader.java  | 33 ++++---
 .../reader/TestRecyclableArrayIterator.java   | 13 ++-
 16 files changed, 225 insertions(+), 90 deletions(-)
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/MutableRecordAndPosition.junk
 create mode 100644 flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordAndPosition.java

diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java b/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
index 1235ebd8502e..29472ccb4341 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/DataIterator.java
@@ -43,10 +43,10 @@ public class DataIterator<T> implements CloseableIterator<T> {
 
   private final InputFilesDecryptor inputFilesDecryptor;
   private final CombinedScanTask combinedTask;
+  private final Position position;
 
-  private Iterator<FileScanTask> tasks;
+  private Iterator<FileScanTask> fileTasksIterator;
   private CloseableIterator<T> currentIterator;
-  private Position position;
 
   public DataIterator(FileScanTaskReader<T> fileScanTaskReader, CombinedScanTask task,
                       FileIO io, EncryptionManager encryption) {
@@ -54,12 +54,12 @@ public DataIterator(FileScanTaskReader<T> fileScanTaskReader, CombinedScanTask t
 
     this.inputFilesDecryptor = new InputFilesDecryptor(task, io, encryption);
     this.combinedTask = task;
-
-    this.tasks = task.files().iterator();
-    this.currentIterator = CloseableIterator.empty();
     // fileOffset starts at -1 because we started
     // from an empty iterator that is not from the split files.
-    this.position = new Position(-1L, 0L);
+    this.position = new Position(-1, 0L);
+
+    this.fileTasksIterator = task.files().iterator();
+    this.currentIterator = CloseableIterator.empty();
   }
 
   public void seek(Position startingPosition) {
@@ -68,7 +68,7 @@ public void seek(Position startingPosition) {
         "Checkpointed file offset is %d, while CombinedScanTask has %d files",
         startingPosition.fileOffset(), combinedTask.files().size());
     for (long i = 0L; i < startingPosition.fileOffset(); ++i) {
-      tasks.next();
+      fileTasksIterator.next();
     }
     updateCurrentIterator();
     // skip records within the file
@@ -106,9 +106,9 @@ public boolean isCurrentIteratorDone() {
    */
   private void updateCurrentIterator() {
     try {
-      while (!currentIterator.hasNext() && tasks.hasNext()) {
+      while (!currentIterator.hasNext() && fileTasksIterator.hasNext()) {
         currentIterator.close();
-        currentIterator = openTaskIterator(tasks.next());
+        currentIterator = openTaskIterator(fileTasksIterator.next());
         position.advanceFile();
       }
     } catch (IOException e) {
@@ -124,7 +124,7 @@ private CloseableIterator<T> openTaskIterator(FileScanTask scanTask) {
   public void close() throws IOException {
     // close the current iterator
     currentIterator.close();
-    tasks = null;
+    fileTasksIterator = null;
   }
 
   public Position position() {
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/Position.java b/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
index 28c4c530fc2e..0e7acecfac7e 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/Position.java
@@ -37,10 +37,10 @@ public class Position implements Serializable {
 
   private static final long serialVersionUID = 1L;
 
-  private long fileOffset;
+  private int fileOffset;
   private long recordOffset;
 
-  public Position(long fileOffset, long recordOffset) {
+  public Position(int fileOffset, long recordOffset) {
     this.fileOffset = fileOffset;
     this.recordOffset = recordOffset;
   }
@@ -54,12 +54,12 @@ void advanceRecord() {
     this.recordOffset += 1L;
   }
 
-  public void update(long newFileOffset, long newRecordOffset) {
+  public void update(int newFileOffset, long newRecordOffset) {
     this.fileOffset = newFileOffset;
     this.recordOffset = newRecordOffset;
   }
 
-  public long fileOffset() {
+  public int fileOffset() {
     return fileOffset;
   }
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/RowDataFileScanTaskReader.java b/flink/src/main/java/org/apache/iceberg/flink/source/RowDataFileScanTaskReader.java
index dbe8a747f96b..fbdb7bf3cc02 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/RowDataFileScanTaskReader.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/RowDataFileScanTaskReader.java
@@ -48,6 +48,7 @@
 
 @Internal
 public class RowDataFileScanTaskReader implements FileScanTaskReader<RowData> {
+
   private final Schema tableSchema;
   private final Schema projectedSchema;
   private final String nameMapping;
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
index c4763a4e0f31..19a6f5f437a6 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ArrayPoolDataIteratorBatcher.java
@@ -24,13 +24,18 @@
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.SourceReaderOptions;
+import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
 import org.apache.flink.connector.file.src.util.Pool;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.flink.FlinkConfigOptions;
 import org.apache.iceberg.flink.source.DataIterator;
 import org.apache.iceberg.flink.source.Position;
 import org.apache.iceberg.io.CloseableIterator;
 
+/**
+ * FLIP-27's {@link SplitReader#fetch()} returns batched {@link RecordsWithSplitIds}
+ * {@link DataIterator} can return reused object, like {@code RowData}. In order to
+ * work with batched fetch API, we need to store cloned objects into object pools.
+ */
 class ArrayPoolDataIteratorBatcher<T> implements DataIteratorBatcher<T> {
   private final Configuration config;
   private final RecordFactory<T> recordFactory;
@@ -41,7 +46,7 @@ class ArrayPoolDataIteratorBatcher<T> implements DataIteratorBatcher<T> {
   }
 
   @Override
-  public CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> apply(
+  public CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> batch(
       String splitId, DataIterator<T> inputIterator) {
     return new ArrayPoolBatchIterator(splitId, inputIterator);
   }
@@ -70,6 +75,9 @@ public RecordsWithSplitIds<RecordAndPosition<T>> next() {
       final T[] batch = getCachedEntry();
       int num = 0;
       while (inputIterator.hasNext() && num < batchSize) {
+        // The record produced by inputIterator can be reused like for the RowData case.
+        // inputIterator.next() can't be called again until the copy is made
+        // since the record is not consumed immediately.
         T nextRecord = inputIterator.next();
         recordFactory.clone(nextRecord, batch[num]);
         num++;
@@ -79,6 +87,7 @@ public RecordsWithSplitIds<RecordAndPosition<T>> next() {
           break;
         }
       }
+
       if (num == 0) {
         return null;
       } else {
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
index 0f0d16a6ab92..f95a7f95e669 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorBatcher.java
@@ -22,15 +22,14 @@
 import java.io.Serializable;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.flink.source.DataIterator;
 import org.apache.iceberg.io.CloseableIterator;
 
 /**
  * Batcher converts iterator of T into iterator of batched {@code RecordsWithSplitIds<RecordAndPosition<T>>},
- * which is what FLIP-27's {@link SplitReader#fetch()} returns.
+ * as FLIP-27's {@link SplitReader#fetch()} returns batched records.
  */
 @FunctionalInterface
 public interface DataIteratorBatcher<T> extends Serializable {
-  CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> apply(String splitId, DataIterator<T> inputIterator);
+  CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> batch(String splitId, DataIterator<T> inputIterator);
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
index 5d32a359ff51..95c65bde1974 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/DataIteratorReaderFunction.java
@@ -20,7 +20,6 @@
 package org.apache.iceberg.flink.source.reader;
 
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.flink.source.DataIterator;
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.io.CloseableIterator;
@@ -43,7 +42,7 @@ public CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> apply(Iceber
     if (split.position() != null) {
       inputIterator.seek(split.position());
     }
-    return batcher.apply(split.splitId(), inputIterator);
+    return batcher.batch(split.splitId(), inputIterator);
   }
 
 }
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
index 3003100968ed..10fe9483f171 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/IcebergSourceSplitReader.java
@@ -28,7 +28,6 @@
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.splitreader.SplitReader;
 import org.apache.flink.connector.base.source.reader.splitreader.SplitsChange;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.io.CloseableIterator;
 import org.slf4j.Logger;
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/MutableRecordAndPosition.junk b/flink/src/main/java/org/apache/iceberg/flink/source/reader/MutableRecordAndPosition.junk
new file mode 100644
index 000000000000..ab3fde206017
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/MutableRecordAndPosition.junk
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import org.apache.flink.annotation.Internal;
+
+/**
+ * A mutable version of the {@link RecordAndPosition}.
+ *
+ * <p>This mutable object is useful in cases where only once instance of a {@code RecordAndPosition}
+ * is needed at a time, like for the result values of the {@link RecyclableArrayIterator}.
+ */
+@Internal
+public class MutableRecordAndPosition<T> extends RecordAndPosition {
+
+  /** Updates the record and position in this object. */
+  public void set(T record, int fileOffset, long recordOffset) {
+    this.record = record;
+    this.fileOffset = fileOffset;
+    this.recordOffset = recordOffset;
+  }
+
+  /** Sets the position without setting a record. */
+  public void position(int fileOffset, long recordOffset) {
+    this.fileOffset = fileOffset;
+    this.recordOffset = recordOffset;
+  }
+
+  /** Sets the next record of a sequence. This increments the {@code recordOffset} by one. */
+  public void record(T record) {
+    this.record = record;
+    this.recordOffset++;
+  }
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
index d8add3d257cc..b008f6f5c7fa 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/ReaderFunction.java
@@ -22,7 +22,6 @@
 import java.io.Serializable;
 import java.util.function.Function;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.flink.source.split.IcebergSourceSplit;
 import org.apache.iceberg.io.CloseableIterator;
 
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordAndPosition.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordAndPosition.java
new file mode 100644
index 000000000000..e0980ca54c93
--- /dev/null
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecordAndPosition.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.iceberg.flink.source.reader;
+
+import org.apache.flink.annotation.Internal;
+
+/**
+ * A record along with the reader position to be stored in the checkpoint.
+ *
+ * <p>The position defines the point in the reader AFTER the record. Record processing and updating
+ * checkpointed state happens atomically. The position points to where the reader should resume
+ * after this record is processed.
+ *
+ * <p>This mutable object is useful in cases where only once instance of a {@code RecordAndPosition}
+ * is needed at a time, like for the result values of the {@link RecyclableArrayIterator}.
+ */
+@Internal
+public class RecordAndPosition<T> {
+  private T record;
+  private int fileOffset;
+  private long recordOffset;
+
+  public RecordAndPosition(T record, int fileOffset, long recordOffset) {
+    this.record = record;
+    this.fileOffset = fileOffset;
+    this.recordOffset = recordOffset;
+  }
+
+  public RecordAndPosition() {
+  }
+
+  // ------------------------------------------------------------------------
+
+  public T record() {
+    return record;
+  }
+
+  public int fileOffset() {
+    return fileOffset;
+  }
+
+  public long recordOffset() {
+    return recordOffset;
+  }
+
+  /** Updates the record and position in this object. */
+  public void set(T newRecord, int newFileOffset, long newRecordOffset) {
+    this.record = newRecord;
+    this.fileOffset = newFileOffset;
+    this.recordOffset = newRecordOffset;
+  }
+
+  /** Sets the position without setting a record. */
+  public void position(int newFileOffset, long newRecordOffset) {
+    this.fileOffset = newFileOffset;
+    this.recordOffset = newRecordOffset;
+  }
+
+  /** Sets the next record of a sequence. This increments the {@code recordOffset} by one. */
+  public void record(T nextRecord) {
+    this.record = nextRecord;
+    this.recordOffset++;
+  }
+
+  @Override
+  public String toString() {
+    return String.format("%s @ %d + %d", record, fileOffset, recordOffset);
+  }
+
+}
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
index 9ffe6654df31..0a197c731610 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/RecyclableArrayIterator.java
@@ -21,17 +21,14 @@
 
 import javax.annotation.Nullable;
 import org.apache.flink.connector.file.src.util.ArrayResultIterator;
-import org.apache.flink.connector.file.src.util.CheckpointedPosition;
-import org.apache.flink.connector.file.src.util.MutableRecordAndPosition;
 import org.apache.flink.connector.file.src.util.Pool;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.io.CloseableIterator;
 
 /**
  * Similar to the {@link ArrayResultIterator}.
  * Main difference is the records array can be recycled back to a pool.
  *
- * Each record's {@link RecordAndPosition} will have the same fileOffset (for {@link RecordAndPosition#getOffset()}.
+ * Each record's {@link RecordAndPosition} will have the same fileOffset (for {@link RecordAndPosition#fileOffset()}.
  * The first returned record will have a records-to-skip count of {@code recordOffset + 1}, following
  * the contract that each record needs to point to the position AFTER itself
  * (because a checkpoint taken after the record was emitted needs to resume from after that record).
@@ -40,21 +37,21 @@ final class RecyclableArrayIterator<E> implements CloseableIterator<RecordAndPos
   private final Pool.Recycler<E[]> recycler;
   private final E[] records;
   private final int num;
-  private final MutableRecordAndPosition<E> recordAndPosition;
+  private final RecordAndPosition<E> recordAndPosition;
 
   private int pos;
 
   RecyclableArrayIterator(Pool.Recycler<E[]> recycler) {
-    this(recycler, null, 0, CheckpointedPosition.NO_OFFSET, 0L);
+    this(recycler, null, 0, -1, 0L);
   }
 
   RecyclableArrayIterator(
-      Pool.Recycler<E[]> recycler, final E[] newRecords,
-      final int newNum, final long fileOffset, final long recordOffset) {
+      Pool.Recycler<E[]> recycler, E[] newRecords,
+      int newNum, int fileOffset, long recordOffset) {
     this.recycler = recycler;
     this.records = newRecords;
     this.num = newNum;
-    this.recordAndPosition = new MutableRecordAndPosition<>();
+    this.recordAndPosition = new RecordAndPosition<>();
     this.recordAndPosition.set(null, fileOffset, recordOffset);
 
     this.pos = 0;
@@ -69,7 +66,7 @@ public boolean hasNext() {
   @Nullable
   public RecordAndPosition<E> next() {
     if (pos < num) {
-      recordAndPosition.setNext(records[pos++]);
+      recordAndPosition.record(records[pos++]);
       return recordAndPosition;
     } else {
       return null;
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/reader/SplitRecords.java b/flink/src/main/java/org/apache/iceberg/flink/source/reader/SplitRecords.java
index 22e3de1a4eb0..25eeccef3774 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/reader/SplitRecords.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/reader/SplitRecords.java
@@ -25,7 +25,6 @@
 import javax.annotation.Nullable;
 import org.apache.flink.annotation.Internal;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.io.CloseableIterator;
 
 /**
diff --git a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
index dc1e77352b07..3344dd4bef11 100644
--- a/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
+++ b/flink/src/main/java/org/apache/iceberg/flink/source/split/IcebergSourceSplit.java
@@ -54,11 +54,11 @@ public IcebergSourceSplit(CombinedScanTask task, Position position) {
   }
 
   public static IcebergSourceSplit fromCombinedScanTask(CombinedScanTask combinedScanTask) {
-    return fromCombinedScanTask(combinedScanTask, 0L, 0L);
+    return fromCombinedScanTask(combinedScanTask, 0, 0L);
   }
 
   public static IcebergSourceSplit fromCombinedScanTask(
-      CombinedScanTask combinedScanTask, long fileOffset, long recordOffset) {
+      CombinedScanTask combinedScanTask, int fileOffset, long recordOffset) {
     return new IcebergSourceSplit(combinedScanTask, new Position(fileOffset, recordOffset));
   }
 
@@ -85,7 +85,7 @@ public String splitId() {
         .toString();
   }
 
-  public void updatePosition(long newFileOffset, long newRecordOffset) {
+  public void updatePosition(int newFileOffset, long newRecordOffset) {
     position.update(newFileOffset, newRecordOffset);
   }
 
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
index 9ee66f3374cb..ae4862f7f3cc 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/ReaderFunctionTestBase.java
@@ -24,7 +24,6 @@
 import java.util.Collection;
 import java.util.List;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.iceberg.BaseCombinedScanTask;
 import org.apache.iceberg.CombinedScanTask;
 import org.apache.iceberg.DataFile;
@@ -137,16 +136,16 @@ public static IcebergSourceSplit sortFilesAsAppendOrder(IcebergSourceSplit split
    */
   private List<T> extractRecordsAndAssertPosition(
       RecordsWithSplitIds<RecordAndPosition<T>> batch,
-      long expectedCount, long exptectedFileOffset, long startRecordOffset) {
+      long expectedCount, int exptectedFileOffset, long startRecordOffset) {
     // need to call nextSplit first in order to read the batch
     batch.nextSplit();
     final List<T> records = new ArrayList<>();
     long recordOffset = startRecordOffset;
     RecordAndPosition<T> recordAndPosition;
     while ((recordAndPosition = batch.nextRecordFromSplit()) != null) {
-      records.add(recordAndPosition.getRecord());
-      Assert.assertEquals("expected file offset", exptectedFileOffset, recordAndPosition.getOffset());
-      Assert.assertEquals("expected record offset", recordOffset, recordAndPosition.getRecordSkipCount() - 1);
+      records.add(recordAndPosition.record());
+      Assert.assertEquals("expected file offset", exptectedFileOffset, recordAndPosition.fileOffset());
+      Assert.assertEquals("expected record offset", recordOffset, recordAndPosition.recordOffset() - 1);
       recordOffset++;
     }
     Assert.assertEquals("expected record count", expectedCount, records.size());
@@ -159,17 +158,17 @@ public void testNoCheckpointedPosition() throws IOException {
     final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
-    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0L, 0L);
+    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0, 0L);
     assertRecords(recordBatchList.get(0), actual0, TestFixtures.SCHEMA);
     batch0.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
-    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1, 0L);
     assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
     batch1.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
-    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2, 0L);
     assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
     batch2.recycle();
   }
@@ -178,21 +177,21 @@ public void testNoCheckpointedPosition() throws IOException {
   public void testCheckpointedPositionBeforeFirstFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
-        new Position(0L, 0L));
+        new Position(0, 0L));
     final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
-    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0L, 0L);
+    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(0).size(), 0, 0L);
     assertRecords(recordBatchList.get(0), actual0, TestFixtures.SCHEMA);
     batch0.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
-    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1, 0L);
     assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
     batch1.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
-    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2, 0L);
     assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
     batch2.recycle();
   }
@@ -201,57 +200,55 @@ public void testCheckpointedPositionBeforeFirstFile() throws IOException {
   public void testCheckpointedPositionMiddleFirstFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
-        new Position(0L, 1L));
+        new Position(0, 1L));
     final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
-    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, 1L, 0L, 1L);
+    final List<T> actual0 = extractRecordsAndAssertPosition(batch0, 1L, 0, 1L);
     assertRecords(recordBatchList.get(0).subList(1, 2), actual0, TestFixtures.SCHEMA);
     batch0.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
-    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1, 0L);
     assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
     batch1.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
-    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2, 0L);
     assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
     batch2.recycle();
   }
 
   @Test
   public void testCheckpointedPositionAfterFirstFile() throws IOException {
-    final IcebergSourceSplit split = new IcebergSourceSplit(
-        icebergSplit.task(),
-        new Position(0L, 2L));
+    final IcebergSourceSplit split = new IcebergSourceSplit(icebergSplit.task(),
+        new Position(0, 2L));
     final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch0 = reader.next();
-    final List<T> actual1 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(1).size(), 1L, 0L);
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch0, recordBatchList.get(1).size(), 1, 0L);
     assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
     batch0.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
-    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2, 0L);
     assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
     batch2.recycle();
   }
 
   @Test
   public void testCheckpointedPositionBeforeSecondFile() throws IOException {
-    final IcebergSourceSplit split = new IcebergSourceSplit(
-        icebergSplit.task(),
-        new Position(1L, 0L));
+    final IcebergSourceSplit split = new IcebergSourceSplit(icebergSplit.task(),
+        new Position(1, 0L));
     final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
-    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1L, 0L);
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, recordBatchList.get(1).size(), 1, 0L);
     assertRecords(recordBatchList.get(1), actual1, TestFixtures.SCHEMA);
     batch1.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
-    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2, 0L);
     assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
     batch2.recycle();
   }
@@ -260,16 +257,16 @@ public void testCheckpointedPositionBeforeSecondFile() throws IOException {
   public void testCheckpointedPositionMidSecondFile() throws IOException {
     final IcebergSourceSplit split = new IcebergSourceSplit(
         icebergSplit.task(),
-        new Position(1L, 1L));
+        new Position(1, 1L));
     final CloseableIterator<RecordsWithSplitIds<RecordAndPosition<T>>> reader = readerFunction().apply(split);
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch1 = reader.next();
-    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, 1L, 1L, 1L);
+    final List<T> actual1 = extractRecordsAndAssertPosition(batch1, 1L, 1, 1L);
     assertRecords(recordBatchList.get(1).subList(1, 2), actual1, TestFixtures.SCHEMA);
     batch1.recycle();
 
     final RecordsWithSplitIds<RecordAndPosition<T>> batch2 = reader.next();
-    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2L, 0L);
+    final List<T> actual2 = extractRecordsAndAssertPosition(batch2, recordBatchList.get(2).size(), 2, 0L);
     assertRecords(recordBatchList.get(2), actual2, TestFixtures.SCHEMA);
     batch2.recycle();
   }
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
index 83d5642f3c00..020d56438c05 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestIcebergSourceSplitReader.java
@@ -26,7 +26,6 @@
 import org.apache.flink.configuration.Configuration;
 import org.apache.flink.connector.base.source.reader.RecordsWithSplitIds;
 import org.apache.flink.connector.base.source.reader.splitreader.SplitsAddition;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.apache.flink.connector.testutils.source.reader.TestingReaderContext;
 import org.apache.flink.metrics.groups.UnregisteredMetricsGroup;
 import org.apache.flink.table.data.RowData;
@@ -114,16 +113,16 @@ public void testFullScan() throws Exception {
     reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch0 = reader.fetch();
-    final List<Row> rowBatch0 = readRows(readBatch0, split.splitId(), 0L, 0L);
+    final List<Row> rowBatch0 = readRows(readBatch0, split.splitId(), 0, 0L);
     TestHelpers.assertRecords(rowBatch0, recordBatchList.get(0), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1
         = reader.fetch();
-    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1L, 0L);
+    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1, 0L);
     TestHelpers.assertRecords(rowBatch1, recordBatchList.get(1), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch2 = reader.fetch();
-    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2L, 0L);
+    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2, 0L);
     TestHelpers.assertRecords(rowBatch2, recordBatchList.get(2), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> finishedBatch = reader.fetch();
@@ -133,15 +132,15 @@ public void testFullScan() throws Exception {
 
   @Test
   public void testResumeFromEndOfFirstBatch() throws Exception {
-    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 0L, 2L);
+    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 0, 2L);
     reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
-    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1L, 0L);
+    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1, 0L);
     TestHelpers.assertRecords(rowBatch1, recordBatchList.get(1), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch2 = reader.fetch();
-    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2L, 0L);
+    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2, 0L);
     TestHelpers.assertRecords(rowBatch2, recordBatchList.get(2), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> finishedBatch = reader.fetch();
@@ -151,15 +150,15 @@ public void testResumeFromEndOfFirstBatch() throws Exception {
 
   @Test
   public void testResumeFromStartOfSecondBatch() throws Exception {
-    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 1L, 0L);
+    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 1, 0L);
     reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
-    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1L, 0L);
+    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1, 0L);
     TestHelpers.assertRecords(rowBatch1, recordBatchList.get(1), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch2 = reader.fetch();
-    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2L, 0L);
+    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2, 0L);
     TestHelpers.assertRecords(rowBatch2, recordBatchList.get(2), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> finishedBatch
@@ -170,15 +169,15 @@ public void testResumeFromStartOfSecondBatch() throws Exception {
 
   @Test
   public void testResumeFromMiddleOfSecondBatch() throws Exception {
-    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 1L, 1L);
+    final IcebergSourceSplit split = IcebergSourceSplit.fromCombinedScanTask(icebergSplit.task(), 1, 1L);
     reader.handleSplitsChanges(new SplitsAddition(Arrays.asList(split)));
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch1 = reader.fetch();
-    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1L, 1L);
+    final List<Row> rowBatch1 = readRows(readBatch1, split.splitId(), 1, 1L);
     TestHelpers.assertRecords(rowBatch1, recordBatchList.get(1).subList(1, 2), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch2 = reader.fetch();
-    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2L, 0L);
+    final List<Row> rowBatch2 = readRows(readBatch2, split.splitId(), 2, 0L);
     TestHelpers.assertRecords(rowBatch2, recordBatchList.get(2), TestFixtures.SCHEMA);
 
     final RecordsWithSplitIds<RecordAndPosition<RowData>> finishedBatch
@@ -189,16 +188,16 @@ public void testResumeFromMiddleOfSecondBatch() throws Exception {
 
   private List<Row> readRows(
       RecordsWithSplitIds<RecordAndPosition<RowData>> readBatch,
-      String expectedSplitId, long expectedOffset, long expectedStartingRecordOffset) {
+      String expectedSplitId, int expectedFileOffset, long expectedStartingRecordOffset) {
     Assert.assertEquals(expectedSplitId, readBatch.nextSplit());
     final List<RowData> rowDataList = new ArrayList<>();
     RecordAndPosition<RowData> row;
     int num = 0;
     while ((row = readBatch.nextRecordFromSplit()) != null) {
-      Assert.assertEquals(expectedOffset, row.getOffset());
+      Assert.assertEquals(expectedFileOffset, row.fileOffset());
       num++;
-      Assert.assertEquals(expectedStartingRecordOffset + num, row.getRecordSkipCount());
-      rowDataList.add(row.getRecord());
+      Assert.assertEquals(expectedStartingRecordOffset + num, row.recordOffset());
+      rowDataList.add(row.record());
     }
     readBatch.recycle();
     return TestHelpers.convertRowDataToRow(rowDataList, TestFixtures.ROW_TYPE);
diff --git a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRecyclableArrayIterator.java b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRecyclableArrayIterator.java
index eb3503ed5371..bf36efcfdc34 100644
--- a/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRecyclableArrayIterator.java
+++ b/flink/src/test/java/org/apache/iceberg/flink/source/reader/TestRecyclableArrayIterator.java
@@ -20,7 +20,6 @@
 package org.apache.iceberg.flink.source.reader;
 
 import java.util.concurrent.atomic.AtomicBoolean;
-import org.apache.flink.connector.file.src.util.RecordAndPosition;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -37,7 +36,7 @@ public void testEmptyConstruction() {
   @Test
   public void testGetElements() {
     final String[] elements = new String[]{"1", "2", "3", "4"};
-    final long initialOffset = 3;
+    final int initialOffset = 3;
     final long initialSkipCount = 17;
 
     // dummy recycler
@@ -46,9 +45,9 @@ public void testGetElements() {
 
     for (int i = 0; i < elements.length; i++) {
       final RecordAndPosition<String> recAndPos = iter.next();
-      Assert.assertEquals(elements[i], recAndPos.getRecord());
-      Assert.assertEquals(initialOffset, recAndPos.getOffset());
-      Assert.assertEquals(initialSkipCount + i + 1, recAndPos.getRecordSkipCount());
+      Assert.assertEquals(elements[i], recAndPos.record());
+      Assert.assertEquals(initialOffset, recAndPos.fileOffset());
+      Assert.assertEquals(initialSkipCount + i + 1, recAndPos.recordOffset());
     }
   }
 
@@ -56,7 +55,7 @@ public void testGetElements() {
   public void testExhausted() {
     // dummy recycler
     final RecyclableArrayIterator<String> iter = new RecyclableArrayIterator<>(
-        ignored -> System.currentTimeMillis(), new String[]{"1", "2"}, 2, 0L, 0L);
+        ignored -> System.currentTimeMillis(), new String[]{"1", "2"}, 2, 0, 0L);
 
     iter.next();
     iter.next();
@@ -68,7 +67,7 @@ public void testExhausted() {
   public void testArraySubRange() {
     // dummy recycler
     final RecyclableArrayIterator<String> iter = new RecyclableArrayIterator<>(ignored -> System.currentTimeMillis(),
-        new String[]{"1", "2", "3"}, 2, 0L, 0L);
+        new String[]{"1", "2", "3"}, 2, 0, 0L);
 
     Assert.assertNotNull(iter.next());
     Assert.assertNotNull(iter.next());