diff --git a/examples/storm-starter/pom.xml b/examples/storm-starter/pom.xml
index 929c8ea48b7..a7d7b23544c 100644
--- a/examples/storm-starter/pom.xml
+++ b/examples/storm-starter/pom.xml
@@ -151,26 +151,12 @@
org.apache.kafka
- kafka_2.10
- 0.8.2.1
-
+ ${kafka.artifact.id}
provided
-
-
- org.apache.zookeeper
- zookeeper
-
-
- log4j
- log4j
-
-
org.apache.kafka
kafka-clients
- 0.8.2.1
- provided
org.apache.storm
diff --git a/external/flux/flux-examples/pom.xml b/external/flux/flux-examples/pom.xml
index 28d72392489..48cc1519057 100644
--- a/external/flux/flux-examples/pom.xml
+++ b/external/flux/flux-examples/pom.xml
@@ -95,18 +95,7 @@
org.apache.kafka
- kafka_2.10
- 0.8.1.1
-
-
- org.apache.zookeeper
- zookeeper
-
-
- log4j
- log4j
-
-
+ ${kafka.artifact.id}
diff --git a/external/flux/pom.xml b/external/flux/pom.xml
index 1fd1683c6ed..56d9babc076 100644
--- a/external/flux/pom.xml
+++ b/external/flux/pom.xml
@@ -78,19 +78,8 @@
org.apache.kafka
- kafka_2.10
- 0.8.1.1
+ ${kafka.artifact.id}
test
-
-
- org.apache.zookeeper
- zookeeper
-
-
- log4j
- log4j
-
-
junit
diff --git a/external/sql/storm-sql-kafka/pom.xml b/external/sql/storm-sql-kafka/pom.xml
index 450611ec837..0642d179d0b 100644
--- a/external/sql/storm-sql-kafka/pom.xml
+++ b/external/sql/storm-sql-kafka/pom.xml
@@ -63,26 +63,12 @@
org.apache.kafka
- kafka_2.10
- 0.8.2.1
-
+ ${kafka.artifact.id}
provided
-
-
- org.apache.zookeeper
- zookeeper
-
-
- log4j
- log4j
-
-
org.apache.kafka
kafka-clients
- 0.8.2.1
- provided
com.fasterxml.jackson.core
diff --git a/external/storm-kafka-client/README.md b/external/storm-kafka-client/README.md
new file mode 100644
index 00000000000..8ac15f524ba
--- /dev/null
+++ b/external/storm-kafka-client/README.md
@@ -0,0 +1,9 @@
+#Storm Kafka Spout New Consumer API
+
+This patch is still under development and it comes with no warranties at this moment.
+
+It has not been thoroughly tested, and therefore there may be some bugs and it is not ready for production.
+
+The documentation will be uploaded soon.
+
+To see how to use the new Kafka Spout, please refer to the example under tests. Thank you!
\ No newline at end of file
diff --git a/external/storm-kafka-client/pom.xml b/external/storm-kafka-client/pom.xml
new file mode 100644
index 00000000000..6c82b6a57b7
--- /dev/null
+++ b/external/storm-kafka-client/pom.xml
@@ -0,0 +1,86 @@
+
+
+
+ 4.0.0
+
+
+ storm
+ org.apache.storm
+ 2.0.0-SNAPSHOT
+ ../../pom.xml
+
+
+ storm-kafka-client
+ storm-kafka-client
+
+ jar
+
+
+
+ hmcl
+ Hugo Louro
+ hmclouro@gmail.com
+
+
+
+
+
+
+ org.apache.storm
+ storm-core
+ ${project.version}
+ provided
+
+
+
+ org.apache.kafka
+ kafka-clients
+
+
+
+ org.mockito
+ mockito-all
+ test
+
+
+ junit
+ junit
+ 4.11
+ test
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-jar-plugin
+ 2.5
+
+
+
+ test-jar
+
+
+
+
+
+
+
diff --git a/external/storm-kafka-client/src/main/java/org/apache/storm/kafka/spout/KafkaSpout.java b/external/storm-kafka-client/src/main/java/org/apache/storm/kafka/spout/KafkaSpout.java
new file mode 100644
index 00000000000..d211ae915a1
--- /dev/null
+++ b/external/storm-kafka-client/src/main/java/org/apache/storm/kafka/spout/KafkaSpout.java
@@ -0,0 +1,547 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.storm.kafka.spout;
+
+import org.apache.kafka.clients.consumer.ConsumerRebalanceListener;
+import org.apache.kafka.clients.consumer.ConsumerRecord;
+import org.apache.kafka.clients.consumer.ConsumerRecords;
+import org.apache.kafka.clients.consumer.KafkaConsumer;
+import org.apache.kafka.clients.consumer.OffsetAndMetadata;
+import org.apache.kafka.common.TopicPartition;
+import org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy;
+import org.apache.storm.spout.SpoutOutputCollector;
+import org.apache.storm.task.TopologyContext;
+import org.apache.storm.topology.OutputFieldsDeclarer;
+import org.apache.storm.topology.base.BaseRichSpout;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableSet;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.concurrent.TimeUnit;
+
+import static org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy.EARLIEST;
+import static org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy.LATEST;
+import static org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy.UNCOMMITTED_EARLIEST;
+import static org.apache.storm.kafka.spout.KafkaSpoutConfig.FirstPollOffsetStrategy.UNCOMMITTED_LATEST;
+
+public class KafkaSpout extends BaseRichSpout {
+ private static final Logger LOG = LoggerFactory.getLogger(KafkaSpout.class);
+ private static final Comparator OFFSET_COMPARATOR = new OffsetComparator();
+
+ // Storm
+ protected SpoutOutputCollector collector;
+
+ // Kafka
+ private final KafkaSpoutConfig kafkaSpoutConfig;
+ private transient KafkaConsumer kafkaConsumer;
+ private transient boolean consumerAutoCommitMode;
+
+
+ // Bookkeeping
+ private transient int maxRetries; // Max number of times a tuple is retried
+ private transient FirstPollOffsetStrategy firstPollOffsetStrategy; // Strategy to determine the fetch offset of the first realized by the spout upon activation
+ private transient KafkaSpoutRetryService retryService; // Class that has the logic to handle tuple failure
+ private transient Timer commitTimer; // timer == null for auto commit mode
+ private transient boolean initialized; // Flag indicating that the spout is still undergoing initialization process.
+ // Initialization is only complete after the first call to KafkaSpoutConsumerRebalanceListener.onPartitionsAssigned()
+
+ private KafkaSpoutStreams kafkaSpoutStreams; // Object that wraps all the logic to declare output fields and emit tuples
+ private transient KafkaSpoutTuplesBuilder tuplesBuilder; // Object that contains the logic to build tuples for each ConsumerRecord
+
+ private transient Map acked; // Tuples that were successfully acked. These tuples will be committed periodically when the commit timer expires, after consumer rebalance, or on close/deactivate
+ private transient Set emitted; // Tuples that have been emitted but that are "on the wire", i.e. pending being acked or failed
+ private transient Iterator> waitingToEmit; // Records that have been polled and are queued to be emitted in the nextTuple() call. One record is emitted per nextTuple()
+ private transient long numUncommittedOffsets; // Number of offsets that have been polled and emitted but not yet been committed
+
+
+ public KafkaSpout(KafkaSpoutConfig kafkaSpoutConfig) {
+ this.kafkaSpoutConfig = kafkaSpoutConfig; // Pass in configuration
+ this.kafkaSpoutStreams = kafkaSpoutConfig.getKafkaSpoutStreams();
+ }
+
+ @Override
+ public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
+ initialized = false;
+
+ // Spout internals
+ this.collector = collector;
+ maxRetries = kafkaSpoutConfig.getMaxTupleRetries();
+ numUncommittedOffsets = 0;
+
+ // Offset management
+ firstPollOffsetStrategy = kafkaSpoutConfig.getFirstPollOffsetStrategy();
+ consumerAutoCommitMode = kafkaSpoutConfig.isConsumerAutoCommitMode();
+
+ // Retries management
+ retryService = kafkaSpoutConfig.getRetryService();
+
+ // Tuples builder delegate
+ tuplesBuilder = kafkaSpoutConfig.getTuplesBuilder();
+
+ if (!consumerAutoCommitMode) { // If it is auto commit, no need to commit offsets manually
+ commitTimer = new Timer(500, kafkaSpoutConfig.getOffsetsCommitPeriodMs(), TimeUnit.MILLISECONDS);
+ }
+
+ acked = new HashMap<>();
+ emitted = new HashSet<>();
+ waitingToEmit = Collections.emptyListIterator();
+
+ LOG.info("Kafka Spout opened with the following configuration: {}", kafkaSpoutConfig);
+ }
+
+ // =========== Consumer Rebalance Listener - On the same thread as the caller ===========
+
+ private class KafkaSpoutConsumerRebalanceListener implements ConsumerRebalanceListener {
+ @Override
+ public void onPartitionsRevoked(Collection partitions) {
+ LOG.debug("Partitions revoked. [consumer-group={}, consumer={}, topic-partitions={}]",
+ kafkaSpoutConfig.getConsumerGroupId(), kafkaConsumer, partitions);
+ if (!consumerAutoCommitMode && initialized) {
+ initialized = false;
+ commitOffsetsForAckedTuples();
+ }
+ }
+
+ @Override
+ public void onPartitionsAssigned(Collection partitions) {
+ LOG.debug("Partitions reassignment. [consumer-group={}, consumer={}, topic-partitions={}]",
+ kafkaSpoutConfig.getConsumerGroupId(), kafkaConsumer, partitions);
+
+ initialize(partitions);
+ }
+
+ private void initialize(Collection partitions) {
+ if (!consumerAutoCommitMode) {
+ acked.keySet().retainAll(partitions); // remove from acked all partitions that are no longer assigned to this spout
+ }
+
+ retryService.retainAll(partitions);
+
+ for (TopicPartition tp : partitions) {
+ final OffsetAndMetadata committedOffset = kafkaConsumer.committed(tp);
+ final long fetchOffset = doSeek(tp, committedOffset);
+ setAcked(tp, fetchOffset);
+ }
+ initialized = true;
+ LOG.debug("Initialization complete");
+ }
+
+ /**
+ * sets the cursor to the location dictated by the first poll strategy and returns the fetch offset
+ */
+ private long doSeek(TopicPartition tp, OffsetAndMetadata committedOffset) {
+ long fetchOffset;
+ if (committedOffset != null) { // offset was committed for this TopicPartition
+ if (firstPollOffsetStrategy.equals(EARLIEST)) {
+ kafkaConsumer.seekToBeginning(tp);
+ fetchOffset = kafkaConsumer.position(tp);
+ } else if (firstPollOffsetStrategy.equals(LATEST)) {
+ kafkaConsumer.seekToEnd(tp);
+ fetchOffset = kafkaConsumer.position(tp);
+ } else {
+ // By default polling starts at the last committed offset. +1 to point fetch to the first uncommitted offset.
+ fetchOffset = committedOffset.offset() + 1;
+ kafkaConsumer.seek(tp, fetchOffset);
+ }
+ } else { // no commits have ever been done, so start at the beginning or end depending on the strategy
+ if (firstPollOffsetStrategy.equals(EARLIEST) || firstPollOffsetStrategy.equals(UNCOMMITTED_EARLIEST)) {
+ kafkaConsumer.seekToBeginning(tp);
+ } else if (firstPollOffsetStrategy.equals(LATEST) || firstPollOffsetStrategy.equals(UNCOMMITTED_LATEST)) {
+ kafkaConsumer.seekToEnd(tp);
+ }
+ fetchOffset = kafkaConsumer.position(tp);
+ }
+ return fetchOffset;
+ }
+ }
+
+ private void setAcked(TopicPartition tp, long fetchOffset) {
+ // If this partition was previously assigned to this spout, leave the acked offsets as they were to resume where it left off
+ if (!consumerAutoCommitMode && !acked.containsKey(tp)) {
+ acked.put(tp, new OffsetEntry(tp, fetchOffset));
+ }
+ }
+
+ // ======== Next Tuple =======
+
+ @Override
+ public void nextTuple() {
+ if (initialized) {
+ if (commit()) {
+ commitOffsetsForAckedTuples();
+ }
+
+ if (poll()) {
+ setWaitingToEmit(pollKafkaBroker());
+ }
+
+ if (waitingToEmit()) {
+ emit();
+ }
+ } else {
+ LOG.debug("Spout not initialized. Not sending tuples until initialization completes");
+ }
+ }
+
+ private boolean commit() {
+ return !consumerAutoCommitMode && commitTimer.isExpiredResetOnTrue(); // timer != null for non auto commit mode
+ }
+
+ private boolean poll() {
+ return !waitingToEmit() && numUncommittedOffsets < kafkaSpoutConfig.getMaxUncommittedOffsets();
+ }
+
+ private boolean waitingToEmit() {
+ return waitingToEmit != null && waitingToEmit.hasNext();
+ }
+
+ public void setWaitingToEmit(ConsumerRecords consumerRecords) {
+ List> waitingToEmitList = new LinkedList<>();
+ for (TopicPartition tp : consumerRecords.partitions()) {
+ waitingToEmitList.addAll(consumerRecords.records(tp));
+ }
+ waitingToEmit = waitingToEmitList.iterator();
+ LOG.trace("Records waiting to be emitted {}", waitingToEmitList);
+ }
+
+ // ======== poll =========
+ private ConsumerRecords pollKafkaBroker() {
+ doSeekRetriableTopicPartitions();
+
+ final ConsumerRecords consumerRecords = kafkaConsumer.poll(kafkaSpoutConfig.getPollTimeoutMs());
+ final int numPolledRecords = consumerRecords.count();
+ LOG.debug("Polled [{}] records from Kafka. NumUncommittedOffsets=[{}]", numPolledRecords, numUncommittedOffsets);
+ return consumerRecords;
+ }
+
+ private void doSeekRetriableTopicPartitions() {
+ final Set retriableTopicPartitions = retryService.retriableTopicPartitions();
+
+ for (TopicPartition rtp : retriableTopicPartitions) {
+ final OffsetAndMetadata offsetAndMeta = acked.get(rtp).findNextCommitOffset();
+ if (offsetAndMeta != null) {
+ kafkaConsumer.seek(rtp, offsetAndMeta.offset() + 1); // seek to the next offset that is ready to commit in next commit cycle
+ } else {
+ kafkaConsumer.seekToEnd(rtp); // Seek to last committed offset
+ }
+ }
+ }
+
+ // ======== emit =========
+ private void emit() {
+ emitTupleIfNotEmitted(waitingToEmit.next());
+ waitingToEmit.remove();
+ }
+
+ // emits one tuple per record
+ private void emitTupleIfNotEmitted(ConsumerRecord record) {
+ final TopicPartition tp = new TopicPartition(record.topic(), record.partition());
+ final KafkaSpoutMessageId msgId = new KafkaSpoutMessageId(record);
+
+ if (acked.containsKey(tp) && acked.get(tp).contains(msgId)) { // has been acked
+ LOG.trace("Tuple for record [{}] has already been acked. Skipping", record);
+ } else if (emitted.contains(msgId)) { // has been emitted and it's pending ack or fail
+ LOG.trace("Tuple for record [{}] has already been emitted. Skipping", record);
+ } else if (!retryService.isScheduled(msgId) || retryService.isReady(msgId)) { // not scheduled <=> never failed (i.e. never emitted) or ready to be retried
+ final List
org.apache.kafka
- kafka_2.10
- 0.8.2.1
-
+ ${kafka.artifact.id}
provided
-
-
- org.apache.zookeeper
- zookeeper
-
-
- log4j
- log4j
-
-
org.apache.kafka
kafka-clients
- 0.8.2.1
- provided
org.apache.storm
diff --git a/external/storm-solr/pom.xml b/external/storm-solr/pom.xml
index d093ae8437f..ba79ddc686c 100644
--- a/external/storm-solr/pom.xml
+++ b/external/storm-solr/pom.xml
@@ -31,10 +31,10 @@
- Hugo-Louro
- Hugo Louro
- hmclouro@gmail.com
-
+ hmcl
+ Hugo Louro
+ hmclouro@gmail.com
+
diff --git a/pom.xml b/pom.xml
index 12e5a9f50fd..1a899b36946 100644
--- a/pom.xml
+++ b/pom.xml
@@ -82,7 +82,7 @@
Committer
-
+
afeng
@@ -109,7 +109,7 @@
Committer
-
+
jjackson
@@ -249,6 +249,8 @@
1.4.0-incubating
2.6.3
2.18.1
+ 0.9.0.1
+ kafka_2.11
org.apache.storm.testing.IntegrationTest
@@ -282,6 +284,7 @@
external/storm-mongodb
examples/storm-starter
storm-clojure
+ external/storm-kafka-client
@@ -673,14 +676,14 @@
${ring-json.version}
- org.eclipse.jetty
- jetty-servlet
- ${jetty.version}
+ org.eclipse.jetty
+ jetty-servlet
+ ${jetty.version}
- org.eclipse.jetty
- jetty-servlets
- ${jetty.version}
+ org.eclipse.jetty
+ jetty-servlets
+ ${jetty.version}
org.eclipse.jetty
@@ -831,7 +834,7 @@
${thrift.version}
compile
-
+
junit
junit
@@ -839,14 +842,38 @@
test
- org.apache.calcite
- calcite-core
- ${calcite.version}
+ org.apache.calcite
+ calcite-core
+ ${calcite.version}
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${jackson.version}
+
+
+ org.apache.kafka
+ ${kafka.artifact.id}
+ ${kafka.version}
+
+
+ org.apache.zookeeper
+ zookeeper
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
- com.fasterxml.jackson.core
- jackson-databind
- ${jackson.version}
+ org.apache.kafka
+ kafka-clients
+ ${kafka.version}
uk.org.lidalia
diff --git a/storm-dist/binary/src/main/assembly/binary.xml b/storm-dist/binary/src/main/assembly/binary.xml
index 7f0da6fc62e..648640ea691 100644
--- a/storm-dist/binary/src/main/assembly/binary.xml
+++ b/storm-dist/binary/src/main/assembly/binary.xml
@@ -317,6 +317,20 @@
README.*
+
+ ${project.basedir}/../../external/storm-kafka-client/target
+ external/storm-kafka-client
+
+ storm*jar
+
+
+
+ ${project.basedir}/../../external/storm-kafka-client
+ external/storm-kafka-client
+
+ README.*
+
+