diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala index f38c1e7996595..88d0421316009 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala @@ -45,6 +45,16 @@ class PairDStreamFunctions[K, V](self: DStream[(K, V)]) new HashPartitioner(numPartitions) } + /** + * Return a DStream with the keys of each tuple. + */ + def keys: DStream[K] = self.map(_._1) + + /** + * Return a DStream with the values of each tuple. + */ + def values: DStream[V] = self.map(_._2) + /** * Return a new DStream by applying `groupByKey` to each RDD. Hash partitioning is used to * generate the RDDs with Spark's default number of partitions.