@@ -26,7 +26,7 @@ import org.apache.kafka.common.TopicPartition
2626
2727import org .apache .spark .SparkEnv
2828import org .apache .spark .internal .Logging
29- import org .apache .spark .internal .LogKeys .{ERROR , OFFSETS , TIP }
29+ import org .apache .spark .internal .LogKeys .{ERROR , OFFSETS , TIP , TOPIC_PARTITION_OFFSET }
3030import org .apache .spark .internal .config .Network .NETWORK_TIMEOUT
3131import org .apache .spark .sql .SparkSession
3232import org .apache .spark .sql .connector .read .{InputPartition , PartitionReaderFactory }
@@ -60,7 +60,11 @@ private[kafka010] class KafkaMicroBatchStream(
6060 metadataPath : String ,
6161 startingOffsets : KafkaOffsetRangeLimit ,
6262 failOnDataLoss : Boolean )
63- extends SupportsTriggerAvailableNow with ReportsSourceMetrics with MicroBatchStream with Logging {
63+ extends SupportsTriggerAvailableNow
64+ with SupportsRealTimeMode
65+ with ReportsSourceMetrics
66+ with MicroBatchStream
67+ with Logging {
6468
6569 private [kafka010] val pollTimeoutMs = options.getLong(
6670 KafkaSourceProvider .CONSUMER_POLL_TIMEOUT ,
@@ -93,6 +97,11 @@ private[kafka010] class KafkaMicroBatchStream(
9397
9498 private var isTriggerAvailableNow : Boolean = false
9599
100+ private var inRealTimeMode = false
101+ override def prepareForRealTimeMode (): Unit = {
102+ inRealTimeMode = true
103+ }
104+
96105 /**
97106 * Lazily initialize `initialPartitionOffsets` to make sure that `KafkaConsumer.poll` is only
98107 * called in StreamExecutionThread. Otherwise, interrupting a thread while running
@@ -218,6 +227,107 @@ private[kafka010] class KafkaMicroBatchStream(
218227 }.toArray
219228 }
220229
230+ override def planInputPartitions (start : Offset ): Array [InputPartition ] = {
231+ // This function is used for real time mode. Trigger restrictions won't be supported.
232+ if (maxOffsetsPerTrigger.isDefined) {
233+ throw new UnsupportedOperationException (
234+ " maxOffsetsPerTrigger is not compatible with real time mode" )
235+ }
236+ if (minOffsetPerTrigger.isDefined) {
237+ throw new UnsupportedOperationException (
238+ " minOffsetsPerTrigger is not compatible with real time mode"
239+ )
240+ }
241+ if (options.containsKey(KafkaSourceProvider .MIN_PARTITIONS_OPTION_KEY )) {
242+ throw new UnsupportedOperationException (
243+ " minpartitions is not compatible with real time mode"
244+ )
245+ }
246+ if (options.containsKey(KafkaSourceProvider .ENDING_TIMESTAMP_OPTION_KEY )) {
247+ throw new UnsupportedOperationException (
248+ " endingtimestamp is not compatible with real time mode"
249+ )
250+ }
251+ if (options.containsKey(KafkaSourceProvider .MAX_TRIGGER_DELAY )) {
252+ throw new UnsupportedOperationException (
253+ " maxtriggerdelay is not compatible with real time mode"
254+ )
255+ }
256+
257+ // This function is used by Real-time Mode, where we expect 1:1 mapping between a
258+ // topic partition and an input partition.
259+ // We are skipping partition range check for performance reason. We can always try to do
260+ // it in tasks if needed.
261+ val startPartitionOffsets = start.asInstanceOf [KafkaSourceOffset ].partitionToOffsets
262+
263+ // Here we check previous topic partitions with latest partition offsets to see if we need to
264+ // update the partition list. Here we don't need the updated partition topic to be absolutely
265+ // up to date, because there might already be minutes' delay since new partition is created.
266+ // latestPartitionOffsets should be fetched not long ago anyway.
267+ // If the topic partitions change, we fetch the earliest offsets for all new partitions
268+ // and add them to the list.
269+ assert(latestPartitionOffsets != null , " latestPartitionOffsets should be set in latestOffset" )
270+ val latestTopicPartitions = latestPartitionOffsets.keySet
271+ val newStartPartitionOffsets = if (startPartitionOffsets.keySet == latestTopicPartitions) {
272+ startPartitionOffsets
273+ } else {
274+ val newPartitions = latestTopicPartitions.diff(startPartitionOffsets.keySet)
275+ // Instead of fetching earliest offsets, we could fill offset 0 here and avoid this extra
276+ // admin function call. But we consider new partition is rare and getting earliest offset
277+ // aligns with what we do in micro-batch mode and can potentially enable more sanity checks
278+ // in executor side.
279+ val newPartitionOffsets = kafkaOffsetReader.fetchEarliestOffsets(newPartitions.toSeq)
280+
281+ assert(
282+ newPartitionOffsets.keys.forall(! startPartitionOffsets.contains(_)),
283+ " startPartitionOffsets should not contain any key in newPartitionOffsets" )
284+
285+ logInfo(log " Partitions added: ${MDC (TOPIC_PARTITION_OFFSET , newPartitionOffsets)}" )
286+ // Filter out new partition offsets that are not 0 and log a warning
287+ val nonZeroNewPartitionOffsets = newPartitionOffsets.filter {
288+ case (_, offset) => offset != 0
289+ }
290+ // Log the non-zero new partition offsets
291+ if (nonZeroNewPartitionOffsets.nonEmpty) {
292+ logWarning(log " new partitions should start from offset 0: " +
293+ log " ${MDC (OFFSETS , nonZeroNewPartitionOffsets)}" )
294+ nonZeroNewPartitionOffsets.foreach {
295+ case (p, o) =>
296+ reportDataLoss(
297+ s " Added partition $p starts from $o instead of 0. Some data may have been missed " ,
298+ () => KafkaExceptions .addedPartitionDoesNotStartFromZero(p, o))
299+ }
300+ }
301+
302+ val deletedPartitions = startPartitionOffsets.keySet.diff(latestTopicPartitions)
303+ if (deletedPartitions.nonEmpty) {
304+ reportDataLoss(
305+ s " $deletedPartitions are gone. Some data may have been missed " ,
306+ () =>
307+ KafkaExceptions .partitionsDeleted(deletedPartitions, None ))
308+ }
309+
310+ startPartitionOffsets ++ newPartitionOffsets
311+ }
312+
313+ newStartPartitionOffsets.keySet.toSeq.map { tp =>
314+ val fromOffset = newStartPartitionOffsets(tp)
315+ KafkaBatchInputPartition (
316+ KafkaOffsetRange (tp, fromOffset, Long .MaxValue , preferredLoc = None ),
317+ executorKafkaParams,
318+ pollTimeoutMs,
319+ failOnDataLoss,
320+ includeHeaders)
321+ }.toArray
322+ }
323+
324+ override def mergeOffsets (offsets : Array [PartitionOffset ]): Offset = {
325+ val mergedMap = offsets.map {
326+ case KafkaSourcePartitionOffset (p, o) => (p, o)
327+ }.toMap
328+ KafkaSourceOffset (mergedMap)
329+ }
330+
221331 override def createReaderFactory (): PartitionReaderFactory = {
222332 KafkaBatchReaderFactory
223333 }
@@ -235,7 +345,22 @@ private[kafka010] class KafkaMicroBatchStream(
235345 override def toString (): String = s " KafkaV2[ $kafkaOffsetReader] "
236346
237347 override def metrics (latestConsumedOffset : Optional [Offset ]): ju.Map [String , String ] = {
238- KafkaMicroBatchStream .metrics(latestConsumedOffset, latestPartitionOffsets)
348+ val reCalculatedLatestPartitionOffsets =
349+ if (inRealTimeMode) {
350+ if (! latestConsumedOffset.isPresent) {
351+ // this means a batch has no end offsets, which should not happen
352+ None
353+ } else {
354+ Some {
355+ kafkaOffsetReader.fetchLatestOffsets(
356+ Some (latestConsumedOffset.get.asInstanceOf [KafkaSourceOffset ].partitionToOffsets))
357+ }
358+ }
359+ } else {
360+ Some (latestPartitionOffsets)
361+ }
362+
363+ KafkaMicroBatchStream .metrics(latestConsumedOffset, reCalculatedLatestPartitionOffsets)
239364 }
240365
241366 /**
@@ -386,13 +511,14 @@ object KafkaMicroBatchStream extends Logging {
386511 */
387512 def metrics (
388513 latestConsumedOffset : Optional [Offset ],
389- latestAvailablePartitionOffsets : PartitionOffsetMap ): ju.Map [String , String ] = {
514+ latestAvailablePartitionOffsets : Option [ PartitionOffsetMap ] ): ju.Map [String , String ] = {
390515 val offset = Option (latestConsumedOffset.orElse(null ))
391516
392- if (offset.nonEmpty && latestAvailablePartitionOffsets != null ) {
517+ if (offset.nonEmpty && latestAvailablePartitionOffsets.isDefined ) {
393518 val consumedPartitionOffsets = offset.map(KafkaSourceOffset (_)).get.partitionToOffsets
394- val offsetsBehindLatest = latestAvailablePartitionOffsets
395- .map(partitionOffset => partitionOffset._2 - consumedPartitionOffsets(partitionOffset._1))
519+ val offsetsBehindLatest = latestAvailablePartitionOffsets.get
520+ .map(partitionOffset => partitionOffset._2 -
521+ consumedPartitionOffsets.getOrElse(partitionOffset._1, 0L ))
396522 if (offsetsBehindLatest.nonEmpty) {
397523 val avgOffsetBehindLatest = offsetsBehindLatest.sum.toDouble / offsetsBehindLatest.size
398524 return Map [String , String ](
0 commit comments