3535
3636import java .io .Serializable ;
3737import java .util .ArrayList ;
38- import java .util .Arrays ;
3938import java .util .Collection ;
4039import java .util .Collections ;
4140import java .util .HashSet ;
@@ -71,7 +70,7 @@ public class KafkaTridentSpoutEmitter<K, V> implements IOpaquePartitionedTrident
7170
7271 private TopologyContext topologyContext ;
7372
74- public KafkaTridentSpoutEmitter (KafkaTridentSpoutManager <K ,V > kafkaManager , TopologyContext topologyContext , Timer refreshSubscriptionTimer ) {
73+ public KafkaTridentSpoutEmitter (KafkaTridentSpoutManager <K , V > kafkaManager , TopologyContext topologyContext , Timer refreshSubscriptionTimer ) {
7574 this .kafkaConsumer = kafkaManager .createAndSubscribeKafkaConsumer (topologyContext );
7675 this .kafkaManager = kafkaManager ;
7776 this .topologyContext = topologyContext ;
@@ -87,14 +86,14 @@ public KafkaTridentSpoutEmitter(KafkaTridentSpoutManager<K,V> kafkaManager, Topo
8786 /**
8887 * Creates instance of this class with default 500 millisecond refresh subscription timer
8988 */
90- public KafkaTridentSpoutEmitter (KafkaTridentSpoutManager <K ,V > kafkaManager , TopologyContext topologyContext ) {
89+ public KafkaTridentSpoutEmitter (KafkaTridentSpoutManager <K , V > kafkaManager , TopologyContext topologyContext ) {
9190 this (kafkaManager , topologyContext , new Timer (500 ,
9291 kafkaManager .getKafkaSpoutConfig ().getPartitionRefreshPeriodMs (), TimeUnit .MILLISECONDS ));
9392 }
9493
9594 @ Override
9695 public KafkaTridentSpoutBatchMetadata <K , V > emitPartitionBatch (TransactionAttempt tx , TridentCollector collector ,
97- KafkaTridentSpoutTopicPartition currBatchPartition , KafkaTridentSpoutBatchMetadata <K , V > lastBatch ) {
96+ KafkaTridentSpoutTopicPartition currBatchPartition , KafkaTridentSpoutBatchMetadata <K , V > lastBatch ) {
9897
9998 LOG .debug ("Processing batch: [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], [collector = {}]" ,
10099 tx , currBatchPartition , lastBatch , collector );
@@ -105,10 +104,10 @@ public KafkaTridentSpoutBatchMetadata<K, V> emitPartitionBatch(TransactionAttemp
105104 Collection <TopicPartition > pausedTopicPartitions = Collections .emptySet ();
106105
107106 if (assignments == null || !assignments .contains (currBatchPartition .getTopicPartition ())) {
108- LOG .warn ("SKIPPING processing batch: [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], " +
109- "[collector = {}] because it is not assigned {} to consumer instance [{}] of consumer group [{}]" ,
110- tx , currBatchPartition , lastBatch , collector , assignments , kafkaConsumer ,
111- kafkaManager .getKafkaSpoutConfig ().getConsumerGroupId ());
107+ LOG .warn ("SKIPPING processing batch [transaction = {}], [currBatchPartition = {}], [lastBatchMetadata = {}], " +
108+ "[collector = {}] because it is not part of the assignments {} of consumer instance [{}] " +
109+ "of consumer group [{}]" , tx , currBatchPartition , lastBatch , collector , assignments ,
110+ kafkaConsumer , kafkaManager .getKafkaSpoutConfig ().getConsumerGroupId ());
112111 } else {
113112 try {
114113 // pause other topic-partitions to only poll from current topic-partition
@@ -205,67 +204,40 @@ public void refreshPartitions(List<KafkaTridentSpoutTopicPartition> partitionRes
205204 /**
206205 * Computes ordered list of topic-partitions for this task taking into consideration that topic-partitions
207206 * for this task must be assigned to the Kafka consumer running on this task.
207+ *
208208 * @param allPartitionInfo list of all partitions as returned by {@link KafkaTridentSpoutOpaqueCoordinator}
209209 * @return ordered list of topic partitions for this task
210210 */
211211 @ Override
212212 public List <KafkaTridentSpoutTopicPartition > getOrderedPartitions (final List <TopicPartition > allPartitionInfo ) {
213- final int numTopicPartitions = allPartitionInfo == null ? 0 : allPartitionInfo .size ();
214- final int taskIndex = topologyContext .getThisTaskIndex ();
215- final int numTasks = topologyContext .getComponentTasks (topologyContext .getThisComponentId ()).size ();
216-
217- LOG .debug ("Computing task ordered list of topic-partitions from all partitions list {}, " +
218- "for task with index [{}] of total tasks [{}] " , allPartitionInfo , taskIndex , numTasks );
219-
220- final Set <TopicPartition > assignment = kafkaConsumer .assignment ();
221- LOG .debug ("Consumer [{}] has assigned topic-partitions {}" , kafkaConsumer , assignment );
222-
223- List <KafkaTridentSpoutTopicPartition > taskOrderedTps = new ArrayList <>(numTopicPartitions );
224-
225- if (numTopicPartitions > 0 ) {
226- final KafkaTridentSpoutTopicPartition [] tps = new KafkaTridentSpoutTopicPartition [numTopicPartitions ];
227- int tpTaskComputedIdx = taskIndex ;
228- /*
229- * Put this task's Kafka consumer assigned topic-partitions in the right index locations such
230- * that distribution by OpaquePartitionedTridentSpoutExecutor can be done correctly. This algorithm
231- * does the distribution in exactly the same way as the one used in OpaquePartitionedTridentSpoutExecutor
232- */
233- for (TopicPartition assignedTp : assignment ) {
234- if (tpTaskComputedIdx >= numTopicPartitions ) {
235- LOG .warn ("Ignoring attempt to add consumer [{}] assigned topic-partition [{}] to index [{}], " +
236- "out of bounds [{}]. " , kafkaConsumer , assignedTp , tpTaskComputedIdx , numTopicPartitions );
237- break ;
238- }
239- tps [tpTaskComputedIdx ] = new KafkaTridentSpoutTopicPartition (assignedTp );
240- LOG .debug ("Added consumer assigned topic-partition [{}] to position [{}] for task with index [{}]" ,
241- assignedTp , tpTaskComputedIdx , taskIndex );
242- tpTaskComputedIdx += numTasks ;
243- }
213+ final List <KafkaTridentSpoutTopicPartition > allPartitions = newKafkaTridentSpoutTopicPartitions (allPartitionInfo );
214+ LOG .debug ("Returning all topic-partitions {} across all tasks. Current task index [{}]. Total tasks [{}] " ,
215+ allPartitions , topologyContext .getThisTaskIndex (), getNumTasks ());
216+ return allPartitions ;
217+ }
244218
245- // Put topic-partitions assigned to consumer instances running in different tasks in the empty slots
246- int i = 0 ;
247- for (TopicPartition tp : allPartitionInfo ) {
248- /*
249- * Topic-partition not assigned to the Kafka consumer associated with this emitter task, hence not yet
250- * added to the list of task ordered partitions. To be processed next.
251- */
252- if (!assignment .contains (tp )) {
253- for (; i < numTopicPartitions ; i ++) {
254- if (tps [i ] == null ) { // find empty slot to put the topic-partition
255- tps [i ] = new KafkaTridentSpoutTopicPartition (tp );
256- LOG .debug ("Added to position [{}] topic-partition [{}], which is assigned to a consumer " +
257- "running on a task other than task with index [{}] " , i , tp , taskIndex );
258- i ++;
259- break ;
260- }
261- }
262- }
219+ @ Override
220+ public List <KafkaTridentSpoutTopicPartition > getPartitionsForTask (int taskId , int numTasks , List <TopicPartition > allPartitionInfo ) {
221+ final Set <TopicPartition > assignedTps = kafkaConsumer .assignment ();
222+ LOG .debug ("Consumer [{}], running on task with index [{}], has assigned topic-partitions {}" , kafkaConsumer , taskId , assignedTps );
223+ final List <KafkaTridentSpoutTopicPartition > taskTps = newKafkaTridentSpoutTopicPartitions (assignedTps );
224+ LOG .debug ("Returning topic-partitions {} for task with index [{}]" , taskTps , taskId );
225+ return taskTps ;
226+ }
227+
228+ private List <KafkaTridentSpoutTopicPartition > newKafkaTridentSpoutTopicPartitions (Collection <TopicPartition > tps ) {
229+ final List <KafkaTridentSpoutTopicPartition > kttp = new ArrayList <>(tps == null ? 0 : tps .size ());
230+ if (tps != null ) {
231+ for (TopicPartition tp : tps ) {
232+ LOG .trace ("Added topic-partition [{}]" , tp );
233+ kttp .add (new KafkaTridentSpoutTopicPartition (tp ));
263234 }
264- taskOrderedTps = Arrays .asList (tps );
265235 }
266- LOG .debug ("Returning ordered list of topic-partitions {} for task with index [{}], of total tasks [{}] " ,
267- taskOrderedTps , taskIndex , numTasks );
268- return taskOrderedTps ;
236+ return kttp ;
237+ }
238+
239+ private int getNumTasks () {
240+ return topologyContext .getComponentTasks (topologyContext .getThisComponentId ()).size ();
269241 }
270242
271243 @ Override
0 commit comments