diff --git a/.rat-excludes b/.rat-excludes index bf071eba652b1..335772733deee 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -85,4 +85,7 @@ org.apache.spark.sql.sources.DataSourceRegister org.apache.spark.scheduler.SparkHistoryListenerFactory .*parquet LZ4BlockInputStream.java +<<<<<<< HEAD +======= spark-deps-.* +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 diff --git a/R/pkg/R/column.R b/R/pkg/R/column.R index 356bcee3cf5c6..54eb97f430898 100644 --- a/R/pkg/R/column.R +++ b/R/pkg/R/column.R @@ -215,7 +215,11 @@ setMethod("%in%", #' otherwise #' +<<<<<<< HEAD +#' If values in the specified column are null, returns the value. +======= #' If values in the specified column are null, returns the value. +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 #' Can be used in conjunction with `when` to specify a default value for expressions. #' #' @rdname otherwise @@ -225,7 +229,11 @@ setMethod("%in%", setMethod("otherwise", signature(x = "Column", value = "ANY"), function(x, value) { +<<<<<<< HEAD + value <- ifelse(class(value) == "Column", value@jc, value) +======= value <- if (class(value) == "Column") { value@jc } else { value } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 jc <- callJMethod(x@jc, "otherwise", value) column(jc) }) diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R index df36bc869acb4..8825d1ca8a111 100644 --- a/R/pkg/R/functions.R +++ b/R/pkg/R/functions.R @@ -37,7 +37,11 @@ setMethod("lit", signature("ANY"), function(x) { jc <- callJStatic("org.apache.spark.sql.functions", "lit", +<<<<<<< HEAD + ifelse(class(x) == "Column", x@jc, x)) +======= if (class(x) == "Column") { x@jc } else { x }) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 column(jc) }) @@ -2262,7 +2266,11 @@ setMethod("unix_timestamp", signature(x = "Column", format = "character"), setMethod("when", signature(condition = "Column", value = "ANY"), function(condition, value) { condition <- condition@jc +<<<<<<< HEAD + value <- ifelse(class(value) == "Column", value@jc, value) +======= value <- if (class(value) == "Column") { value@jc } else { value } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 jc <- callJStatic("org.apache.spark.sql.functions", "when", condition, value) column(jc) }) @@ -2277,16 +2285,25 @@ setMethod("when", signature(condition = "Column", value = "ANY"), #' @name ifelse #' @seealso \link{when} #' @export +<<<<<<< HEAD +#' @examples \dontrun{ifelse(df$a > 1 & df$b > 2, 0, 1)} +======= #' @examples \dontrun{ #' ifelse(df$a > 1 & df$b > 2, 0, 1) #' ifelse(df$a > 1, df$a, 1) #' } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 setMethod("ifelse", signature(test = "Column", yes = "ANY", no = "ANY"), function(test, yes, no) { test <- test@jc +<<<<<<< HEAD + yes <- ifelse(class(yes) == "Column", yes@jc, yes) + no <- ifelse(class(no) == "Column", no@jc, no) +======= yes <- if (class(yes) == "Column") { yes@jc } else { yes } no <- if (class(no) == "Column") { no@jc } else { no } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 jc <- callJMethod(callJStatic("org.apache.spark.sql.functions", "when", test, yes), diff --git a/R/pkg/inst/tests/testthat/test_sparkSQL.R b/R/pkg/inst/tests/testthat/test_sparkSQL.R index 7b508b860efb2..931261f3632b7 100644 --- a/R/pkg/inst/tests/testthat/test_sparkSQL.R +++ b/R/pkg/inst/tests/testthat/test_sparkSQL.R @@ -62,10 +62,13 @@ mockLinesComplexType <- complexTypeJsonPath <- tempfile(pattern="sparkr-test", fileext=".tmp") writeLines(mockLinesComplexType, complexTypeJsonPath) +<<<<<<< HEAD +======= test_that("calling sparkRSQL.init returns existing SQL context", { expect_equal(sparkRSQL.init(sc), sqlContext) }) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 test_that("infer types and check types", { expect_equal(infer_type(1L), "integer") expect_equal(infer_type(1.0), "double") @@ -1124,6 +1127,8 @@ test_that("when(), otherwise() and ifelse() on a DataFrame", { expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, 0, 1)))[, 1], c(1, 0)) }) +<<<<<<< HEAD +======= test_that("when(), otherwise() and ifelse() with column on a DataFrame", { l <- list(list(a = 1, b = 2), list(a = 3, b = 4)) df <- createDataFrame(sqlContext, l) @@ -1132,6 +1137,7 @@ test_that("when(), otherwise() and ifelse() with column on a DataFrame", { expect_equal(collect(select(df, ifelse(df$a > 1 & df$b > 2, lit(0), lit(1))))[, 1], c(1, 0)) }) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 test_that("group by, agg functions", { df <- read.json(sqlContext, jsonPath) df1 <- agg(df, name = "max", age = "sum") diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css index 48f86d1536c99..b88ee12c75869 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/webui.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css @@ -225,6 +225,9 @@ a.expandbutton { background-color: #49535a !important; color: white; cursor:pointer; +<<<<<<< HEAD +} +======= } .table-head-clickable th a, .table-head-clickable th a:hover { @@ -235,3 +238,4 @@ a.expandbutton { color: #333; text-decoration: none; } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index ff2c4c34c0ca7..c7e9bc51333bd 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -544,8 +544,12 @@ private[spark] object SparkConf extends Logging { DeprecatedConfig("spark.kryoserializer.buffer.mb", "1.4", "Please use spark.kryoserializer.buffer instead. The default value for " + "spark.kryoserializer.buffer.mb was previously specified as '0.064'. Fractional values " + +<<<<<<< HEAD + "are no longer accepted. To specify the equivalent now, one may use '64k'.") +======= "are no longer accepted. To specify the equivalent now, one may use '64k'."), DeprecatedConfig("spark.rpc", "2.0", "Not used any more.") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 ) Map(configs.map { cfg => (cfg.key -> cfg) } : _*) diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 77e44ee0264af..4f3f93c3e9a67 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -836,7 +836,11 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli minPartitions: Int = defaultMinPartitions): RDD[String] = withScope { assertNotStopped() hadoopFile(path, classOf[TextInputFormat], classOf[LongWritable], classOf[Text], +<<<<<<< HEAD + minPartitions).map(pair => pair._2.toString) +======= minPartitions).map(pair => pair._2.toString).setName(path) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** @@ -874,18 +878,30 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli path: String, minPartitions: Int = defaultMinPartitions): RDD[(String, String)] = withScope { assertNotStopped() +<<<<<<< HEAD + val job = new NewHadoopJob(hadoopConfiguration) + // Use setInputPaths so that wholeTextFiles aligns with hadoopFile/textFile in taking + // comma separated files as input. (see SPARK-7155) + NewFileInputFormat.setInputPaths(job, path) + val updateConf = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val job = NewHadoopJob.getInstance(hadoopConfiguration) // Use setInputPaths so that wholeTextFiles aligns with hadoopFile/textFile in taking // comma separated files as input. (see SPARK-7155) NewFileInputFormat.setInputPaths(job, path) val updateConf = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 new WholeTextFileRDD( this, classOf[WholeTextFileInputFormat], classOf[Text], classOf[Text], updateConf, +<<<<<<< HEAD + minPartitions).setName(path).map(record => (record._1.toString, record._2.toString)) +======= minPartitions).map(record => (record._1.toString, record._2.toString)).setName(path) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** @@ -923,11 +939,19 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli path: String, minPartitions: Int = defaultMinPartitions): RDD[(String, PortableDataStream)] = withScope { assertNotStopped() +<<<<<<< HEAD + val job = new NewHadoopJob(hadoopConfiguration) + // Use setInputPaths so that binaryFiles aligns with hadoopFile/textFile in taking + // comma separated files as input. (see SPARK-7155) + NewFileInputFormat.setInputPaths(job, path) + val updateConf = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val job = NewHadoopJob.getInstance(hadoopConfiguration) // Use setInputPaths so that binaryFiles aligns with hadoopFile/textFile in taking // comma separated files as input. (see SPARK-7155) NewFileInputFormat.setInputPaths(job, path) val updateConf = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 new BinaryFileRDD( this, classOf[StreamInputFormat], @@ -1100,6 +1124,15 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli vClass: Class[V], conf: Configuration = hadoopConfiguration): RDD[(K, V)] = withScope { assertNotStopped() +<<<<<<< HEAD + // The call to new NewHadoopJob automatically adds security credentials to conf, + // so we don't need to explicitly add them ourselves + val job = new NewHadoopJob(conf) + // Use setInputPaths so that newAPIHadoopFile aligns with hadoopFile/textFile in taking + // comma separated files as input. (see SPARK-7155) + NewFileInputFormat.setInputPaths(job, path) + val updatedConf = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= // The call to NewHadoopJob automatically adds security credentials to conf, // so we don't need to explicitly add them ourselves val job = NewHadoopJob.getInstance(conf) @@ -1107,6 +1140,7 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli // comma separated files as input. (see SPARK-7155) NewFileInputFormat.setInputPaths(job, path) val updatedConf = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 new NewHadoopRDD(this, fClass, kClass, vClass, updatedConf).setName(path) } @@ -1369,7 +1403,11 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli if (!fs.exists(hadoopPath)) { throw new FileNotFoundException(s"Added file $hadoopPath does not exist.") } +<<<<<<< HEAD + val isDir = fs.getFileStatus(hadoopPath).isDir +======= val isDir = fs.getFileStatus(hadoopPath).isDirectory +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (!isLocal && scheme == "file" && isDir) { throw new SparkException(s"addFile does not support local directories when not running " + "local mode.") diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala index b98cc964eda87..f2996216f5368 100644 --- a/core/src/main/scala/org/apache/spark/SparkEnv.scala +++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala @@ -34,6 +34,10 @@ import org.apache.spark.memory.{MemoryManager, StaticMemoryManager, UnifiedMemor import org.apache.spark.network.BlockTransferService import org.apache.spark.network.netty.NettyBlockTransferService import org.apache.spark.rpc.{RpcEndpointRef, RpcEndpoint, RpcEnv} +<<<<<<< HEAD +import org.apache.spark.rpc.akka.AkkaRpcEnv +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.scheduler.{OutputCommitCoordinator, LiveListenerBus} import org.apache.spark.scheduler.OutputCommitCoordinator.OutputCommitCoordinatorEndpoint import org.apache.spark.serializer.Serializer @@ -96,7 +100,13 @@ class SparkEnv ( blockManager.master.stop() metricsSystem.stop() outputCommitCoordinator.stop() +<<<<<<< HEAD + if (!rpcEnv.isInstanceOf[AkkaRpcEnv]) { + actorSystem.shutdown() + } +======= actorSystem.shutdown() +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 rpcEnv.shutdown() // Unfortunately Akka's awaitTermination doesn't actually wait for the Netty server to shut @@ -245,11 +255,22 @@ object SparkEnv extends Logging { val securityManager = new SecurityManager(conf) +<<<<<<< HEAD + // Create the ActorSystem for Akka and get the port it binds to. + val actorSystemName = if (isDriver) driverActorSystemName else executorActorSystemName + val rpcEnv = RpcEnv.create(actorSystemName, hostname, port, conf, securityManager, + clientMode = !isDriver) + val actorSystem: ActorSystem = + if (rpcEnv.isInstanceOf[AkkaRpcEnv]) { + rpcEnv.asInstanceOf[AkkaRpcEnv].actorSystem + } else { +======= val actorSystemName = if (isDriver) driverActorSystemName else executorActorSystemName // Create the ActorSystem for Akka and get the port it binds to. val rpcEnv = RpcEnv.create(actorSystemName, hostname, port, conf, securityManager, clientMode = !isDriver) val actorSystem: ActorSystem = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val actorSystemPort = if (port == 0 || rpcEnv.address == null) { port diff --git a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala index dd400b8ae8a16..1cdb7ac78a167 100644 --- a/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala +++ b/core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala @@ -25,7 +25,10 @@ import java.util.Date import org.apache.hadoop.mapred._ import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.fs.Path +<<<<<<< HEAD +======= import org.apache.hadoop.mapreduce.TaskType +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mapred.SparkHadoopMapRedUtil import org.apache.spark.rdd.HadoopRDD @@ -38,7 +41,14 @@ import org.apache.spark.util.SerializableJobConf * a filename to write to, etc, exactly like in a Hadoop MapReduce job. */ private[spark] +<<<<<<< HEAD +class SparkHadoopWriter(jobConf: JobConf) + extends Logging + with SparkHadoopMapRedUtil + with Serializable { +======= class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private val now = new Date() private val conf = new SerializableJobConf(jobConf) @@ -129,7 +139,11 @@ class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable { private def getJobContext(): JobContext = { if (jobContext == null) { +<<<<<<< HEAD + jobContext = newJobContext(conf.value, jID.value) +======= jobContext = new JobContextImpl(conf.value, jID.value) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } jobContext } @@ -141,12 +155,15 @@ class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable { taskContext } +<<<<<<< HEAD +======= protected def newTaskAttemptContext( conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = { new TaskAttemptContextImpl(conf, attemptId) } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private def setIDs(jobid: Int, splitid: Int, attemptid: Int) { jobID = jobid splitID = splitid @@ -154,7 +171,11 @@ class SparkHadoopWriter(jobConf: JobConf) extends Logging with Serializable { jID = new SerializableWritable[JobID](SparkHadoopWriter.createJobID(now, jobid)) taID = new SerializableWritable[TaskAttemptID]( +<<<<<<< HEAD + new TaskAttemptID(new TaskID(jID.value, true, splitID), attemptID)) +======= new TaskAttemptID(new TaskID(jID.value, TaskType.MAP, splitID), attemptID)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } @@ -172,9 +193,16 @@ object SparkHadoopWriter { } val outputPath = new Path(path) val fs = outputPath.getFileSystem(conf) +<<<<<<< HEAD + if (outputPath == null || fs == null) { + throw new IllegalArgumentException("Incorrectly formatted output path") + } + outputPath.makeQualified(fs) +======= if (fs == null) { throw new IllegalArgumentException("Incorrectly formatted output path") } outputPath.makeQualified(fs.getUri, fs.getWorkingDirectory) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala index 7f35ac47479b0..92a4963f84477 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastFactory.scala @@ -24,12 +24,21 @@ import org.apache.spark.SparkConf import org.apache.spark.annotation.DeveloperApi /** +<<<<<<< HEAD + * :: DeveloperApi :: +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 * An interface for all the broadcast implementations in Spark (to allow * multiple broadcast implementations). SparkContext uses a user-specified * BroadcastFactory implementation to instantiate a particular broadcast for the * entire Spark job. */ +<<<<<<< HEAD +@DeveloperApi +trait BroadcastFactory { +======= private[spark] trait BroadcastFactory { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager): Unit diff --git a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala index 61343607a13bc..542723e5dab23 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/BroadcastManager.scala @@ -21,8 +21,13 @@ import java.util.concurrent.atomic.AtomicLong import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.spark._ +import org.apache.spark.util.Utils +======= import org.apache.spark.{Logging, SparkConf, SecurityManager} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[spark] class BroadcastManager( val isDriver: Boolean, @@ -39,8 +44,20 @@ private[spark] class BroadcastManager( private def initialize() { synchronized { if (!initialized) { +<<<<<<< HEAD + val broadcastFactoryClass = + conf.get("spark.broadcast.factory", "org.apache.spark.broadcast.TorrentBroadcastFactory") + + broadcastFactory = + Utils.classForName(broadcastFactoryClass).newInstance.asInstanceOf[BroadcastFactory] + + // Initialize appropriate BroadcastFactory and BroadcastObject + broadcastFactory.initialize(isDriver, conf, securityManager) + +======= broadcastFactory = new TorrentBroadcastFactory broadcastFactory.initialize(isDriver, conf, securityManager) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 initialized = true } } diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala new file mode 100644 index 0000000000000..b69af639f7862 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcast.scala @@ -0,0 +1,269 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.broadcast + +import java.io.{File, FileOutputStream, ObjectInputStream, ObjectOutputStream, OutputStream} +import java.io.{BufferedInputStream, BufferedOutputStream} +import java.net.{URL, URLConnection, URI} +import java.util.concurrent.TimeUnit + +import scala.reflect.ClassTag + +import org.apache.spark.{HttpServer, Logging, SecurityManager, SparkConf, SparkEnv} +import org.apache.spark.io.CompressionCodec +import org.apache.spark.storage.{BroadcastBlockId, StorageLevel} +import org.apache.spark.util.{MetadataCleaner, MetadataCleanerType, TimeStampedHashSet, Utils} + +/** + * A [[org.apache.spark.broadcast.Broadcast]] implementation that uses HTTP server + * as a broadcast mechanism. The first time a HTTP broadcast variable (sent as part of a + * task) is deserialized in the executor, the broadcasted data is fetched from the driver + * (through a HTTP server running at the driver) and stored in the BlockManager of the + * executor to speed up future accesses. + */ +private[spark] class HttpBroadcast[T: ClassTag]( + @transient var value_ : T, isLocal: Boolean, id: Long) + extends Broadcast[T](id) with Logging with Serializable { + + override protected def getValue() = value_ + + private val blockId = BroadcastBlockId(id) + + /* + * Broadcasted data is also stored in the BlockManager of the driver. The BlockManagerMaster + * does not need to be told about this block as not only need to know about this data block. + */ + HttpBroadcast.synchronized { + SparkEnv.get.blockManager.putSingle( + blockId, value_, StorageLevel.MEMORY_AND_DISK, tellMaster = false) + } + + if (!isLocal) { + HttpBroadcast.write(id, value_) + } + + /** + * Remove all persisted state associated with this HTTP broadcast on the executors. + */ + override protected def doUnpersist(blocking: Boolean) { + HttpBroadcast.unpersist(id, removeFromDriver = false, blocking) + } + + /** + * Remove all persisted state associated with this HTTP broadcast on the executors and driver. + */ + override protected def doDestroy(blocking: Boolean) { + HttpBroadcast.unpersist(id, removeFromDriver = true, blocking) + } + + /** Used by the JVM when serializing this object. */ + private def writeObject(out: ObjectOutputStream): Unit = Utils.tryOrIOException { + assertValid() + out.defaultWriteObject() + } + + /** Used by the JVM when deserializing this object. */ + private def readObject(in: ObjectInputStream): Unit = Utils.tryOrIOException { + in.defaultReadObject() + HttpBroadcast.synchronized { + SparkEnv.get.blockManager.getSingle(blockId) match { + case Some(x) => value_ = x.asInstanceOf[T] + case None => { + logInfo("Started reading broadcast variable " + id) + val start = System.nanoTime + value_ = HttpBroadcast.read[T](id) + /* + * We cache broadcast data in the BlockManager so that subsequent tasks using it + * do not need to re-fetch. This data is only used locally and no other node + * needs to fetch this block, so we don't notify the master. + */ + SparkEnv.get.blockManager.putSingle( + blockId, value_, StorageLevel.MEMORY_AND_DISK, tellMaster = false) + val time = (System.nanoTime - start) / 1e9 + logInfo("Reading broadcast variable " + id + " took " + time + " s") + } + } + } + } +} + +private[broadcast] object HttpBroadcast extends Logging { + private var initialized = false + private var broadcastDir: File = null + private var compress: Boolean = false + private var bufferSize: Int = 65536 + private var serverUri: String = null + private var server: HttpServer = null + private var securityManager: SecurityManager = null + + // TODO: This shouldn't be a global variable so that multiple SparkContexts can coexist + private val files = new TimeStampedHashSet[File] + private val httpReadTimeout = TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES).toInt + private var compressionCodec: CompressionCodec = null + private var cleaner: MetadataCleaner = null + + def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) { + synchronized { + if (!initialized) { + bufferSize = conf.getInt("spark.buffer.size", 65536) + compress = conf.getBoolean("spark.broadcast.compress", true) + securityManager = securityMgr + if (isDriver) { + createServer(conf) + conf.set("spark.httpBroadcast.uri", serverUri) + } + serverUri = conf.get("spark.httpBroadcast.uri") + cleaner = new MetadataCleaner(MetadataCleanerType.HTTP_BROADCAST, cleanup, conf) + compressionCodec = CompressionCodec.createCodec(conf) + initialized = true + } + } + } + + def stop() { + synchronized { + if (server != null) { + server.stop() + server = null + } + if (cleaner != null) { + cleaner.cancel() + cleaner = null + } + compressionCodec = null + initialized = false + } + } + + private def createServer(conf: SparkConf) { + broadcastDir = Utils.createTempDir(Utils.getLocalDir(conf), "broadcast") + val broadcastPort = conf.getInt("spark.broadcast.port", 0) + server = + new HttpServer(conf, broadcastDir, securityManager, broadcastPort, "HTTP broadcast server") + server.start() + serverUri = server.uri + logInfo("Broadcast server started at " + serverUri) + } + + def getFile(id: Long): File = new File(broadcastDir, BroadcastBlockId(id).name) + + private def write(id: Long, value: Any) { + val file = getFile(id) + val fileOutputStream = new FileOutputStream(file) + Utils.tryWithSafeFinally { + val out: OutputStream = { + if (compress) { + compressionCodec.compressedOutputStream(fileOutputStream) + } else { + new BufferedOutputStream(fileOutputStream, bufferSize) + } + } + val ser = SparkEnv.get.serializer.newInstance() + val serOut = ser.serializeStream(out) + Utils.tryWithSafeFinally { + serOut.writeObject(value) + } { + serOut.close() + } + files += file + } { + fileOutputStream.close() + } + } + + private def read[T: ClassTag](id: Long): T = { + logDebug("broadcast read server: " + serverUri + " id: broadcast-" + id) + val url = serverUri + "/" + BroadcastBlockId(id).name + + var uc: URLConnection = null + if (securityManager.isAuthenticationEnabled()) { + logDebug("broadcast security enabled") + val newuri = Utils.constructURIForAuthentication(new URI(url), securityManager) + uc = newuri.toURL.openConnection() + uc.setConnectTimeout(httpReadTimeout) + uc.setAllowUserInteraction(false) + } else { + logDebug("broadcast not using security") + uc = new URL(url).openConnection() + uc.setConnectTimeout(httpReadTimeout) + } + Utils.setupSecureURLConnection(uc, securityManager) + + val in = { + uc.setReadTimeout(httpReadTimeout) + val inputStream = uc.getInputStream + if (compress) { + compressionCodec.compressedInputStream(inputStream) + } else { + new BufferedInputStream(inputStream, bufferSize) + } + } + val ser = SparkEnv.get.serializer.newInstance() + val serIn = ser.deserializeStream(in) + Utils.tryWithSafeFinally { + serIn.readObject[T]() + } { + serIn.close() + } + } + + /** + * Remove all persisted blocks associated with this HTTP broadcast on the executors. + * If removeFromDriver is true, also remove these persisted blocks on the driver + * and delete the associated broadcast file. + */ + def unpersist(id: Long, removeFromDriver: Boolean, blocking: Boolean): Unit = synchronized { + SparkEnv.get.blockManager.master.removeBroadcast(id, removeFromDriver, blocking) + if (removeFromDriver) { + val file = getFile(id) + files.remove(file) + deleteBroadcastFile(file) + } + } + + /** + * Periodically clean up old broadcasts by removing the associated map entries and + * deleting the associated files. + */ + private def cleanup(cleanupTime: Long) { + val iterator = files.internalMap.entrySet().iterator() + while(iterator.hasNext) { + val entry = iterator.next() + val (file, time) = (entry.getKey, entry.getValue) + if (time < cleanupTime) { + iterator.remove() + deleteBroadcastFile(file) + } + } + } + + private def deleteBroadcastFile(file: File) { + try { + if (file.exists) { + if (file.delete()) { + logInfo("Deleted broadcast file: %s".format(file)) + } else { + logWarning("Could not delete broadcast file: %s".format(file)) + } + } + } catch { + case e: Exception => + logError("Exception while deleting broadcast file: %s".format(file), e) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcastFactory.scala new file mode 100644 index 0000000000000..cf3ae36f27949 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/broadcast/HttpBroadcastFactory.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.broadcast + +import scala.reflect.ClassTag + +import org.apache.spark.{SecurityManager, SparkConf} + +/** + * A [[org.apache.spark.broadcast.BroadcastFactory]] implementation that uses a + * HTTP server as the broadcast mechanism. Refer to + * [[org.apache.spark.broadcast.HttpBroadcast]] for more details about this mechanism. + */ +class HttpBroadcastFactory extends BroadcastFactory { + override def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) { + HttpBroadcast.initialize(isDriver, conf, securityMgr) + } + + override def newBroadcast[T: ClassTag](value_ : T, isLocal: Boolean, id: Long): Broadcast[T] = + new HttpBroadcast[T](value_, isLocal, id) + + override def stop() { HttpBroadcast.stop() } + + /** + * Remove all persisted state associated with the HTTP broadcast with the given ID. + * @param removeFromDriver Whether to remove state from the driver + * @param blocking Whether to block until unbroadcasted + */ + override def unbroadcast(id: Long, removeFromDriver: Boolean, blocking: Boolean) { + HttpBroadcast.unpersist(id, removeFromDriver, blocking) + } +} diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala index 9bd69727f6086..82556b816e7f8 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcast.scala @@ -45,7 +45,11 @@ import org.apache.spark.util.io.ByteArrayChunkOutputStream * BlockManager, ready for other executors to fetch from. * * This prevents the driver from being the bottleneck in sending out multiple copies of the +<<<<<<< HEAD + * broadcast data (one per executor) as done by the [[org.apache.spark.broadcast.HttpBroadcast]]. +======= * broadcast data (one per executor). +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 * * When initialized, TorrentBroadcast objects read SparkEnv.get.conf. * diff --git a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala index b11f9ba171b84..80d66c2cdf5dc 100644 --- a/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala +++ b/core/src/main/scala/org/apache/spark/broadcast/TorrentBroadcastFactory.scala @@ -26,7 +26,11 @@ import org.apache.spark.{SecurityManager, SparkConf} * protocol to do a distributed transfer of the broadcasted data to the executors. Refer to * [[org.apache.spark.broadcast.TorrentBroadcast]] for more details. */ +<<<<<<< HEAD +class TorrentBroadcastFactory extends BroadcastFactory { +======= private[spark] class TorrentBroadcastFactory extends BroadcastFactory { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def initialize(isDriver: Boolean, conf: SparkConf, securityMgr: SecurityManager) { } diff --git a/core/src/main/scala/org/apache/spark/deploy/Client.scala b/core/src/main/scala/org/apache/spark/deploy/Client.scala index 328a1bb84f5fb..af1cd28306df1 100644 --- a/core/src/main/scala/org/apache/spark/deploy/Client.scala +++ b/core/src/main/scala/org/apache/spark/deploy/Client.scala @@ -230,7 +230,11 @@ object Client { RpcEnv.create("driverClient", Utils.localHostName(), 0, conf, new SecurityManager(conf)) val masterEndpoints = driverArgs.masters.map(RpcAddress.fromSparkURL). +<<<<<<< HEAD + map(rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, _, Master.ENDPOINT_NAME)) +======= map(rpcEnv.setupEndpointRef(_, Master.ENDPOINT_NAME)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 rpcEnv.setupEndpoint("client", new ClientEndpoint(rpcEnv, driverArgs, masterEndpoints, conf)) rpcEnv.awaitTermination() diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala index 4bd94f13e57e6..5ba75284bd839 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkHadoopUtil.scala @@ -33,6 +33,12 @@ import org.apache.hadoop.fs.FileSystem.Statistics import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter} import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier import org.apache.hadoop.mapred.JobConf +<<<<<<< HEAD +import org.apache.hadoop.mapreduce.JobContext +import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext} +import org.apache.hadoop.mapreduce.{TaskAttemptID => MapReduceTaskAttemptID} +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.spark.annotation.DeveloperApi @@ -73,6 +79,12 @@ class SparkHadoopUtil extends Logging { } } +<<<<<<< HEAD + @deprecated("use newConfiguration with SparkConf argument", "1.2.0") + def newConfiguration(): Configuration = newConfiguration(null) + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Return an appropriate (subclass) of Configuration. Creating config can initializes some Hadoop * subsystems. @@ -185,6 +197,36 @@ class SparkHadoopUtil extends Logging { } /** +<<<<<<< HEAD + * Using reflection to get the Configuration from JobContext/TaskAttemptContext. If we directly + * call `JobContext/TaskAttemptContext.getConfiguration`, it will generate different byte codes + * for Hadoop 1.+ and Hadoop 2.+ because JobContext/TaskAttemptContext is class in Hadoop 1.+ + * while it's interface in Hadoop 2.+. + */ + def getConfigurationFromJobContext(context: JobContext): Configuration = { + // scalastyle:off jobconfig + val method = context.getClass.getMethod("getConfiguration") + // scalastyle:on jobconfig + method.invoke(context).asInstanceOf[Configuration] + } + + /** + * Using reflection to call `getTaskAttemptID` from TaskAttemptContext. If we directly + * call `TaskAttemptContext.getTaskAttemptID`, it will generate different byte codes + * for Hadoop 1.+ and Hadoop 2.+ because TaskAttemptContext is class in Hadoop 1.+ + * while it's interface in Hadoop 2.+. + */ + def getTaskAttemptIDFromTaskAttemptContext( + context: MapReduceTaskAttemptContext): MapReduceTaskAttemptID = { + // scalastyle:off jobconfig + val method = context.getClass.getMethod("getTaskAttemptID") + // scalastyle:on jobconfig + method.invoke(context).asInstanceOf[MapReduceTaskAttemptID] + } + + /** +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 * Get [[FileStatus]] objects for all leaf children (files) under the given base path. If the * given path points to a file, return a single-element collection containing [[FileStatus]] of * that file. @@ -200,11 +242,19 @@ class SparkHadoopUtil extends Logging { */ def listLeafStatuses(fs: FileSystem, baseStatus: FileStatus): Seq[FileStatus] = { def recurse(status: FileStatus): Seq[FileStatus] = { +<<<<<<< HEAD + val (directories, leaves) = fs.listStatus(status.getPath).partition(_.isDir) + leaves ++ directories.flatMap(f => listLeafStatuses(fs, f)) + } + + if (baseStatus.isDir) recurse(baseStatus) else Seq(baseStatus) +======= val (directories, leaves) = fs.listStatus(status.getPath).partition(_.isDirectory) leaves ++ directories.flatMap(f => listLeafStatuses(fs, f)) } if (baseStatus.isDirectory) recurse(baseStatus) else Seq(baseStatus) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } def listLeafDirStatuses(fs: FileSystem, basePath: Path): Seq[FileStatus] = { @@ -213,12 +263,20 @@ class SparkHadoopUtil extends Logging { def listLeafDirStatuses(fs: FileSystem, baseStatus: FileStatus): Seq[FileStatus] = { def recurse(status: FileStatus): Seq[FileStatus] = { +<<<<<<< HEAD + val (directories, files) = fs.listStatus(status.getPath).partition(_.isDir) +======= val (directories, files) = fs.listStatus(status.getPath).partition(_.isDirectory) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val leaves = if (directories.isEmpty) Seq(status) else Seq.empty[FileStatus] leaves ++ directories.flatMap(dir => listLeafDirStatuses(fs, dir)) } +<<<<<<< HEAD + assert(baseStatus.isDir) +======= assert(baseStatus.isDirectory) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 recurse(baseStatus) } diff --git a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala index f7c33214c2406..aca377cf55d96 100644 --- a/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala +++ b/core/src/main/scala/org/apache/spark/deploy/client/AppClient.scala @@ -104,7 +104,12 @@ private[spark] class AppClient( return } logInfo("Connecting to master " + masterAddress.toSparkURL + "...") +<<<<<<< HEAD + val masterRef = + rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, masterAddress, Master.ENDPOINT_NAME) +======= val masterRef = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 masterRef.send(RegisterApplication(appDescription, self)) } catch { case ie: InterruptedException => // Cancelled diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index c93bc8c127f58..b5877f283baeb 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -28,7 +28,10 @@ import com.google.common.io.ByteStreams import com.google.common.util.concurrent.{MoreExecutors, ThreadFactoryBuilder} import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.hadoop.hdfs.DistributedFileSystem +<<<<<<< HEAD +======= import org.apache.hadoop.hdfs.protocol.HdfsConstants +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.security.AccessControlException import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException} @@ -168,7 +171,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) } throw new IllegalArgumentException(msg) } +<<<<<<< HEAD + if (!fs.getFileStatus(path).isDir) { +======= if (!fs.getFileStatus(path).isDirectory) { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 throw new IllegalArgumentException( "Logging directory specified is not a directory: %s".format(logDir)) } @@ -305,7 +312,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) logError("Exception encountered when attempting to update last scan time", e) lastScanTime } finally { +<<<<<<< HEAD + if (!fs.delete(path)) { +======= if (!fs.delete(path, true)) { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 logWarning(s"Error deleting ${path}") } } @@ -604,7 +615,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) * As of Spark 1.3, these files are consolidated into a single one that replaces the directory. * See SPARK-2261 for more detail. */ +<<<<<<< HEAD + private def isLegacyLogDirectory(entry: FileStatus): Boolean = entry.isDir() +======= private def isLegacyLogDirectory(entry: FileStatus): Boolean = entry.isDirectory +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Returns the modification time of the given event log. If the status points at an empty @@ -649,7 +664,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) } /** +<<<<<<< HEAD + * Checks whether HDFS is in safe mode. The API is slightly different between hadoop 1 and 2, + * so we have to resort to ugly reflection (as usual...). +======= * Checks whether HDFS is in safe mode. +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 * * Note that DistributedFileSystem is a `@LimitedPrivate` class, which for all practical reasons * makes it more public than not. @@ -663,7 +683,15 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) // For testing. private[history] def isFsInSafeMode(dfs: DistributedFileSystem): Boolean = { +<<<<<<< HEAD + val hadoop2Class = "org.apache.hadoop.hdfs.protocol.HdfsConstants$SafeModeAction" + val actionClass: Class[_] = getClass().getClassLoader().loadClass(hadoop2Class) + val action = actionClass.getField("SAFEMODE_GET").get(null) + val method = dfs.getClass().getMethod("setSafeMode", action.getClass()) + method.invoke(dfs, action).asInstanceOf[Boolean] +======= dfs.setSafeMode(HdfsConstants.SafeModeAction.SAFEMODE_GET) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala index 6143a33b69344..65c243dd6aef4 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala @@ -21,8 +21,11 @@ import java.util.NoSuchElementException import java.util.zip.ZipOutputStream import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse} +<<<<<<< HEAD +======= import scala.util.control.NonFatal +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import com.google.common.cache._ import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} import org.apache.spark.{Logging, SecurityManager, SparkConf} @@ -115,6 +118,9 @@ class HistoryServer( } def getSparkUI(appKey: String): Option[SparkUI] = { +<<<<<<< HEAD + Option(appCache.get(appKey)) +======= try { val ui = appCache.get(appKey) Some(ui) @@ -126,6 +132,7 @@ class HistoryServer( case cause: Exception => throw cause } } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } initialize() @@ -205,7 +212,11 @@ class HistoryServer( appCache.get(appId + attemptId.map { id => s"/$id" }.getOrElse("")) true } catch { +<<<<<<< HEAD + case e: Exception => e.getCause() match { +======= case NonFatal(e) => e.getCause() match { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case nsee: NoSuchElementException => false diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala index 37b94e02cc9d1..80d4c96eeb4e4 100755 --- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala @@ -45,6 +45,10 @@ private[deploy] class Worker( cores: Int, memory: Int, masterRpcAddresses: Array[RpcAddress], +<<<<<<< HEAD + systemName: String, +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 endpointName: String, workDirPath: String = null, val conf: SparkConf, @@ -100,7 +104,11 @@ private[deploy] class Worker( private var master: Option[RpcEndpointRef] = None private var activeMasterUrl: String = "" private[worker] var activeMasterWebUiUrl : String = "" +<<<<<<< HEAD + private val workerUri = rpcEnv.uriOf(systemName, rpcEnv.address, endpointName) +======= private val workerUri = RpcEndpointAddress(rpcEnv.address, endpointName).toString +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private var registered = false private var connected = false private val workerId = generateWorkerId() @@ -208,7 +216,12 @@ private[deploy] class Worker( override def run(): Unit = { try { logInfo("Connecting to master " + masterAddress + "...") +<<<<<<< HEAD + val masterEndpoint = + rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, masterAddress, Master.ENDPOINT_NAME) +======= val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 registerWithMaster(masterEndpoint) } catch { case ie: InterruptedException => // Cancelled @@ -264,7 +277,12 @@ private[deploy] class Worker( override def run(): Unit = { try { logInfo("Connecting to master " + masterAddress + "...") +<<<<<<< HEAD + val masterEndpoint = + rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, masterAddress, Master.ENDPOINT_NAME) +======= val masterEndpoint = rpcEnv.setupEndpointRef(masterAddress, Master.ENDPOINT_NAME) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 registerWithMaster(masterEndpoint) } catch { case ie: InterruptedException => // Cancelled @@ -708,7 +726,11 @@ private[deploy] object Worker extends Logging { val rpcEnv = RpcEnv.create(systemName, host, port, conf, securityMgr) val masterAddresses = masterUrls.map(RpcAddress.fromSparkURL(_)) rpcEnv.setupEndpoint(ENDPOINT_NAME, new Worker(rpcEnv, webUiPort, cores, memory, +<<<<<<< HEAD + masterAddresses, systemName, ENDPOINT_NAME, workDir, conf, securityMgr)) +======= masterAddresses, ENDPOINT_NAME, workDir, conf, securityMgr)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 rpcEnv } diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala index de3c7cd265d2d..893c87121769f 100644 --- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala @@ -175,7 +175,11 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) { def checkWorkerMemory(): Unit = { if (memory <= 0) { +<<<<<<< HEAD + val message = "Memory can't be 0, missing a M or G on the end of the memory specification?" +======= val message = "Memory is below 1MB, or missing a M/G at the end of the memory specification?" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 throw new IllegalStateException(message) } } diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala index 30431a9b986bb..787ff8a4c9be5 100644 --- a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala +++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryInputFormat.scala @@ -23,6 +23,10 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext} import org.apache.spark.Logging +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Custom Input Format for reading and splitting flat binary files that contain records, @@ -35,7 +39,11 @@ private[spark] object FixedLengthBinaryInputFormat { /** Retrieves the record length property from a Hadoop configuration */ def getRecordLength(context: JobContext): Int = { +<<<<<<< HEAD + SparkHadoopUtil.get.getConfigurationFromJobContext(context).get(RECORD_LENGTH_PROPERTY).toInt +======= context.getConfiguration.get(RECORD_LENGTH_PROPERTY).toInt +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala index 25596a15d93c0..7be51171384cf 100644 --- a/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala +++ b/core/src/main/scala/org/apache/spark/input/FixedLengthBinaryRecordReader.scala @@ -24,6 +24,10 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.io.{BytesWritable, LongWritable} import org.apache.hadoop.mapreduce.{InputSplit, RecordReader, TaskAttemptContext} import org.apache.hadoop.mapreduce.lib.input.FileSplit +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * FixedLengthBinaryRecordReader is returned by FixedLengthBinaryInputFormat. @@ -82,16 +86,26 @@ private[spark] class FixedLengthBinaryRecordReader // the actual file we will be reading from val file = fileSplit.getPath // job configuration +<<<<<<< HEAD + val job = SparkHadoopUtil.get.getConfigurationFromJobContext(context) + // check compression + val codec = new CompressionCodecFactory(job).getCodec(file) +======= val conf = context.getConfiguration // check compression val codec = new CompressionCodecFactory(conf).getCodec(file) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (codec != null) { throw new IOException("FixedLengthRecordReader does not support reading compressed files") } // get the record length recordLength = FixedLengthBinaryInputFormat.getRecordLength(context) // get the filesystem +<<<<<<< HEAD + val fs = file.getFileSystem(job) +======= val fs = file.getFileSystem(conf) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // open the File fileInputStream = fs.open(file) // seek to the splitStart position diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala index cb76e3c344fca..463b2f1a02a2b 100644 --- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala +++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala @@ -27,6 +27,11 @@ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.{InputSplit, JobContext, RecordReader, TaskAttemptContext} import org.apache.hadoop.mapreduce.lib.input.{CombineFileInputFormat, CombineFileRecordReader, CombineFileSplit} +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A general format for reading whole files in as streams, byte arrays, * or other functions to be added @@ -42,7 +47,11 @@ private[spark] abstract class StreamFileInputFormat[T] */ def setMinPartitions(context: JobContext, minPartitions: Int) { val files = listStatus(context).asScala +<<<<<<< HEAD + val totalLen = files.map(file => if (file.isDir) 0L else file.getLen).sum +======= val totalLen = files.map(file => if (file.isDirectory) 0L else file.getLen).sum +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val maxSplitSize = Math.ceil(totalLen * 1.0 / files.size).toLong super.setMaxSplitSize(maxSplitSize) } @@ -133,7 +142,12 @@ class PortableDataStream( private val confBytes = { val baos = new ByteArrayOutputStream() +<<<<<<< HEAD + SparkHadoopUtil.get.getConfigurationFromJobContext(context). + write(new DataOutputStream(baos)) +======= context.getConfiguration.write(new DataOutputStream(baos)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 baos.toByteArray } diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala index fa34f1e886c72..d29001db61848 100644 --- a/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala +++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileInputFormat.scala @@ -53,7 +53,11 @@ private[spark] class WholeTextFileInputFormat */ def setMinPartitions(context: JobContext, minPartitions: Int) { val files = listStatus(context).asScala +<<<<<<< HEAD + val totalLen = files.map(file => if (file.isDir) 0L else file.getLen).sum +======= val totalLen = files.map(file => if (file.isDirectory) 0L else file.getLen).sum +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val maxSplitSize = Math.ceil(totalLen * 1.0 / (if (minPartitions == 0) 1 else minPartitions)).toLong super.setMaxSplitSize(maxSplitSize) diff --git a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala index 998c898a3fc25..348fbdfe3ed10 100644 --- a/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala +++ b/core/src/main/scala/org/apache/spark/input/WholeTextFileRecordReader.scala @@ -26,6 +26,11 @@ import org.apache.hadoop.mapreduce.InputSplit import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, CombineFileRecordReader} import org.apache.hadoop.mapreduce.RecordReader import org.apache.hadoop.mapreduce.TaskAttemptContext +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A trait to implement [[org.apache.hadoop.conf.Configurable Configurable]] interface. @@ -50,7 +55,12 @@ private[spark] class WholeTextFileRecordReader( extends RecordReader[Text, Text] with Configurable { private[this] val path = split.getPath(index) +<<<<<<< HEAD + private[this] val fs = path.getFileSystem( + SparkHadoopUtil.get.getConfigurationFromJobContext(context)) +======= private[this] val fs = path.getFileSystem(context.getConfiguration) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // True means the current file has been processed, then skip it. private[this] var processed = false diff --git a/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala index 249bdf5994f8f..b8a16955ca0f2 100644 --- a/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala +++ b/core/src/main/scala/org/apache/spark/mapred/SparkHadoopMapRedUtil.scala @@ -18,12 +18,70 @@ package org.apache.spark.mapred import java.io.IOException +<<<<<<< HEAD +import java.lang.reflect.Modifier +import org.apache.hadoop.mapred._ import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext} import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter} +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.executor.CommitDeniedException import org.apache.spark.{Logging, SparkEnv, TaskContext} +import org.apache.spark.util.{Utils => SparkUtils} + +private[spark] +trait SparkHadoopMapRedUtil { + def newJobContext(conf: JobConf, jobId: JobID): JobContext = { + val klass = firstAvailableClass("org.apache.hadoop.mapred.JobContextImpl", + "org.apache.hadoop.mapred.JobContext") + val ctor = klass.getDeclaredConstructor(classOf[JobConf], + classOf[org.apache.hadoop.mapreduce.JobID]) + // In Hadoop 1.0.x, JobContext is an interface, and JobContextImpl is package private. + // Make it accessible if it's not in order to access it. + if (!Modifier.isPublic(ctor.getModifiers)) { + ctor.setAccessible(true) + } + ctor.newInstance(conf, jobId).asInstanceOf[JobContext] + } + + def newTaskAttemptContext(conf: JobConf, attemptId: TaskAttemptID): TaskAttemptContext = { + val klass = firstAvailableClass("org.apache.hadoop.mapred.TaskAttemptContextImpl", + "org.apache.hadoop.mapred.TaskAttemptContext") + val ctor = klass.getDeclaredConstructor(classOf[JobConf], classOf[TaskAttemptID]) + // See above + if (!Modifier.isPublic(ctor.getModifiers)) { + ctor.setAccessible(true) + } + ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] + } + + def newTaskAttemptID( + jtIdentifier: String, + jobId: Int, + isMap: Boolean, + taskId: Int, + attemptId: Int): TaskAttemptID = { + new TaskAttemptID(jtIdentifier, jobId, isMap, taskId, attemptId) + } + + private def firstAvailableClass(first: String, second: String): Class[_] = { + try { + SparkUtils.classForName(first) + } catch { + case e: ClassNotFoundException => + SparkUtils.classForName(second) + } + } +} +======= + +import org.apache.hadoop.mapreduce.{TaskAttemptContext => MapReduceTaskAttemptContext} +import org.apache.hadoop.mapreduce.{OutputCommitter => MapReduceOutputCommitter} + +import org.apache.spark.executor.CommitDeniedException +import org.apache.spark.{Logging, SparkEnv, TaskContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 object SparkHadoopMapRedUtil extends Logging { /** @@ -44,7 +102,11 @@ object SparkHadoopMapRedUtil extends Logging { jobId: Int, splitId: Int): Unit = { +<<<<<<< HEAD + val mrTaskAttemptID = SparkHadoopUtil.get.getTaskAttemptIDFromTaskAttemptContext(mrTaskContext) +======= val mrTaskAttemptID = mrTaskContext.getTaskAttemptID +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Called after we have decided to commit def performCommit(): Unit = { diff --git a/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala new file mode 100644 index 0000000000000..82d807fad8938 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/mapreduce/SparkHadoopMapReduceUtil.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mapreduce + +import java.lang.{Boolean => JBoolean, Integer => JInteger} + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.mapreduce.{JobContext, JobID, TaskAttemptContext, TaskAttemptID} +import org.apache.spark.util.Utils + +private[spark] +trait SparkHadoopMapReduceUtil { + def newJobContext(conf: Configuration, jobId: JobID): JobContext = { + val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.JobContextImpl") + val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[JobID]) + ctor.newInstance(conf, jobId).asInstanceOf[JobContext] + } + + def newTaskAttemptContext(conf: Configuration, attemptId: TaskAttemptID): TaskAttemptContext = { + val klass = Utils.classForName("org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl") + val ctor = klass.getDeclaredConstructor(classOf[Configuration], classOf[TaskAttemptID]) + ctor.newInstance(conf, attemptId).asInstanceOf[TaskAttemptContext] + } + + def newTaskAttemptID( + jtIdentifier: String, + jobId: Int, + isMap: Boolean, + taskId: Int, + attemptId: Int): TaskAttemptID = { + val klass = Utils.classForName("org.apache.hadoop.mapreduce.TaskAttemptID") + try { + // First, attempt to use the old-style constructor that takes a boolean isMap + // (not available in YARN) + val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], classOf[Boolean], + classOf[Int], classOf[Int]) + ctor.newInstance(jtIdentifier, new JInteger(jobId), new JBoolean(isMap), new JInteger(taskId), + new JInteger(attemptId)).asInstanceOf[TaskAttemptID] + } catch { + case exc: NoSuchMethodException => { + // If that failed, look for the new constructor that takes a TaskType (not available in 1.x) + val taskTypeClass = Utils.classForName("org.apache.hadoop.mapreduce.TaskType") + .asInstanceOf[Class[Enum[_]]] + val taskType = taskTypeClass.getMethod("valueOf", classOf[String]).invoke( + taskTypeClass, if (isMap) "MAP" else "REDUCE") + val ctor = klass.getDeclaredConstructor(classOf[String], classOf[Int], taskTypeClass, + classOf[Int], classOf[Int]) + ctor.newInstance(jtIdentifier, new JInteger(jobId), taskType, new JInteger(taskId), + new JInteger(attemptId)).asInstanceOf[TaskAttemptID] + } + } + } +} diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala index 14f541f937b4c..3bd4a6c91bfd0 100644 --- a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala @@ -77,13 +77,21 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi This implementation is non-blocking, asynchronously handling the results of each job and triggering the next job using callbacks on futures. */ +<<<<<<< HEAD + def continue(partsScanned: Long)(implicit jobSubmitter: JobSubmitter) : Future[Seq[T]] = +======= def continue(partsScanned: Int)(implicit jobSubmitter: JobSubmitter) : Future[Seq[T]] = +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (results.size >= num || partsScanned >= totalParts) { Future.successful(results.toSeq) } else { // The number of partitions to try in this iteration. It is ok for this number to be // greater than totalParts because we actually cap it at totalParts in runJob. +<<<<<<< HEAD + var numPartsToTry = 1L +======= var numPartsToTry = 1 +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (partsScanned > 0) { // If we didn't find any rows after the previous iteration, quadruple and retry. // Otherwise, interpolate the number of partitions we need to try, but overestimate it @@ -99,7 +107,11 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi } val left = num - results.size +<<<<<<< HEAD + val p = partsScanned.toInt until math.min(partsScanned + numPartsToTry, totalParts).toInt +======= val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val buf = new Array[Array[T]](p.size) self.context.setCallSite(callSite) @@ -111,11 +123,19 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi Unit) job.flatMap {_ => buf.foreach(results ++= _.take(num - results.size)) +<<<<<<< HEAD + continue(partsScanned + p.size) + } + } + + new ComplexFutureAction[Seq[T]](continue(0L)(_)) +======= continue(partsScanned + numPartsToTry) } } new ComplexFutureAction[Seq[T]](continue(0)(_)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** diff --git a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala index 2bf2337d49fef..989d5a707e62f 100644 --- a/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/BinaryFileRDD.scala @@ -20,8 +20,11 @@ package org.apache.spark.rdd import org.apache.hadoop.conf.{ Configurable, Configuration } import org.apache.hadoop.io.Writable import org.apache.hadoop.mapreduce._ +<<<<<<< HEAD +======= import org.apache.hadoop.mapreduce.task.JobContextImpl +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.input.StreamFileInputFormat import org.apache.spark.{ Partition, SparkContext } @@ -42,7 +45,11 @@ private[spark] class BinaryFileRDD[T]( configurable.setConf(conf) case _ => } +<<<<<<< HEAD + val jobContext = newJobContext(conf, jobId) +======= val jobContext = new JobContextImpl(conf, jobId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 inputFormat.setMinPartitions(jobContext, minPartitions) val rawSplits = inputFormat.getSplits(jobContext).toArray val result = new Array[Partition](rawSplits.size) diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index 920d3bf219ff5..b6101a53345c8 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -36,7 +36,10 @@ import org.apache.hadoop.mapred.JobID import org.apache.hadoop.mapred.TaskAttemptID import org.apache.hadoop.mapred.TaskID import org.apache.hadoop.mapred.lib.CombineFileSplit +<<<<<<< HEAD +======= import org.apache.hadoop.mapreduce.TaskType +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.util.ReflectionUtils import org.apache.spark._ @@ -358,7 +361,11 @@ private[spark] object HadoopRDD extends Logging { def addLocalConfiguration(jobTrackerId: String, jobId: Int, splitId: Int, attemptId: Int, conf: JobConf) { val jobID = new JobID(jobTrackerId, jobId) +<<<<<<< HEAD + val taId = new TaskAttemptID(new TaskID(jobID, true, splitId), attemptId) +======= val taId = new TaskAttemptID(new TaskID(jobID, TaskType.MAP, splitId), attemptId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 conf.set("mapred.tip.id", taId.getTaskID.toString) conf.set("mapred.task.id", taId.toString) diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala index 8b330a34c3d3a..217df45dac9cb 100644 --- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala @@ -26,11 +26,18 @@ import org.apache.hadoop.conf.{Configurable, Configuration} import org.apache.hadoop.io.Writable import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit} +<<<<<<< HEAD +======= import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.annotation.DeveloperApi import org.apache.spark._ import org.apache.spark.executor.DataReadMethod +<<<<<<< HEAD +import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.NewHadoopRDD.NewHadoopMapPartitionsWithSplitRDD import org.apache.spark.util.{SerializableConfiguration, ShutdownHookManager} import org.apache.spark.deploy.SparkHadoopUtil @@ -66,7 +73,13 @@ class NewHadoopRDD[K, V]( keyClass: Class[K], valueClass: Class[V], @transient private val _conf: Configuration) +<<<<<<< HEAD + extends RDD[(K, V)](sc, Nil) + with SparkHadoopMapReduceUtil + with Logging { +======= extends RDD[(K, V)](sc, Nil) with Logging { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it private val confBroadcast = sc.broadcast(new SerializableConfiguration(_conf)) @@ -107,7 +120,11 @@ class NewHadoopRDD[K, V]( configurable.setConf(_conf) case _ => } +<<<<<<< HEAD + val jobContext = newJobContext(_conf, jobId) +======= val jobContext = new JobContextImpl(_conf, jobId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val rawSplits = inputFormat.getSplits(jobContext).toArray val result = new Array[Partition](rawSplits.size) for (i <- 0 until rawSplits.size) { @@ -142,8 +159,13 @@ class NewHadoopRDD[K, V]( configurable.setConf(conf) case _ => } +<<<<<<< HEAD + val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0) + val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) +======= val attemptId = new TaskAttemptID(jobTrackerId, id, TaskType.MAP, split.index, 0) val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private var reader = format.createRecordReader( split.serializableHadoopSplit.value, hadoopAttemptContext) reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext) diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index b87230142532b..1108568e7eebb 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -33,14 +33,23 @@ import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.io.SequenceFile.CompressionType import org.apache.hadoop.io.compress.CompressionCodec import org.apache.hadoop.mapred.{FileOutputCommitter, FileOutputFormat, JobConf, OutputFormat} +<<<<<<< HEAD +import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob, OutputFormat => NewOutputFormat, + RecordWriter => NewRecordWriter} +======= import org.apache.hadoop.mapreduce.{Job => NewAPIHadoopJob, OutputFormat => NewOutputFormat, RecordWriter => NewRecordWriter, TaskType, TaskAttemptID} import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark._ import org.apache.spark.Partitioner.defaultPartitioner import org.apache.spark.annotation.Experimental import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.executor.{DataWriteMethod, OutputMetrics} +<<<<<<< HEAD +import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.partial.{BoundedDouble, PartialResult} import org.apache.spark.serializer.Serializer import org.apache.spark.util.{SerializableConfiguration, Utils} @@ -52,7 +61,14 @@ import org.apache.spark.util.random.StratifiedSamplingUtils */ class PairRDDFunctions[K, V](self: RDD[(K, V)]) (implicit kt: ClassTag[K], vt: ClassTag[V], ord: Ordering[K] = null) +<<<<<<< HEAD + extends Logging + with SparkHadoopMapReduceUtil + with Serializable +{ +======= extends Logging with Serializable { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: Experimental :: @@ -981,11 +997,19 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) conf: Configuration = self.context.hadoopConfiguration): Unit = self.withScope { // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038). val hadoopConf = conf +<<<<<<< HEAD + val job = new NewAPIHadoopJob(hadoopConf) + job.setOutputKeyClass(keyClass) + job.setOutputValueClass(valueClass) + job.setOutputFormatClass(outputFormatClass) + val jobConfiguration = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val job = NewAPIHadoopJob.getInstance(hadoopConf) job.setOutputKeyClass(keyClass) job.setOutputValueClass(valueClass) job.setOutputFormatClass(outputFormatClass) val jobConfiguration = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 jobConfiguration.set("mapred.output.dir", path) saveAsNewAPIHadoopDataset(jobConfiguration) } @@ -1070,11 +1094,19 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) def saveAsNewAPIHadoopDataset(conf: Configuration): Unit = self.withScope { // Rename this as hadoopConf internally to avoid shadowing (see SPARK-2038). val hadoopConf = conf +<<<<<<< HEAD + val job = new NewAPIHadoopJob(hadoopConf) + val formatter = new SimpleDateFormat("yyyyMMddHHmm") + val jobtrackerID = formatter.format(new Date()) + val stageId = self.id + val jobConfiguration = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val job = NewAPIHadoopJob.getInstance(hadoopConf) val formatter = new SimpleDateFormat("yyyyMMddHHmm") val jobtrackerID = formatter.format(new Date()) val stageId = self.id val jobConfiguration = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val wrappedConf = new SerializableConfiguration(jobConfiguration) val outfmt = job.getOutputFormatClass val jobFormat = outfmt.newInstance @@ -1087,9 +1119,15 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) val writeShard = (context: TaskContext, iter: Iterator[(K, V)]) => { val config = wrappedConf.value /* "reduce task" */ +<<<<<<< HEAD + val attemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = false, context.partitionId, + context.attemptNumber) + val hadoopContext = newTaskAttemptContext(config, attemptId) +======= val attemptId = new TaskAttemptID(jobtrackerID, stageId, TaskType.REDUCE, context.partitionId, context.attemptNumber) val hadoopContext = new TaskAttemptContextImpl(config, attemptId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val format = outfmt.newInstance format match { case c: Configurable => c.setConf(config) @@ -1121,8 +1159,13 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) 1 } : Int +<<<<<<< HEAD + val jobAttemptId = newTaskAttemptID(jobtrackerID, stageId, isMap = true, 0, 0) + val jobTaskContext = newTaskAttemptContext(wrappedConf.value, jobAttemptId) +======= val jobAttemptId = new TaskAttemptID(jobtrackerID, stageId, TaskType.MAP, 0, 0) val jobTaskContext = new TaskAttemptContextImpl(wrappedConf.value, jobAttemptId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val jobCommitter = jobFormat.getOutputCommitter(jobTaskContext) // When speculation is on and output committer class name contains "Direct", we should warn diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 9fe9d83a705b2..6cc511ed88f18 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1291,11 +1291,19 @@ abstract class RDD[T: ClassTag]( } else { val buf = new ArrayBuffer[T] val totalParts = this.partitions.length +<<<<<<< HEAD + var partsScanned = 0L + while (buf.size < num && partsScanned < totalParts) { + // The number of partitions to try in this iteration. It is ok for this number to be + // greater than totalParts because we actually cap it at totalParts in runJob. + var numPartsToTry = 1L +======= var partsScanned = 0 while (buf.size < num && partsScanned < totalParts) { // The number of partitions to try in this iteration. It is ok for this number to be // greater than totalParts because we actually cap it at totalParts in runJob. var numPartsToTry = 1 +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (partsScanned > 0) { // If we didn't find any rows after the previous iteration, quadruple and retry. // Otherwise, interpolate the number of partitions we need to try, but overestimate @@ -1310,11 +1318,19 @@ abstract class RDD[T: ClassTag]( } val left = num - buf.size +<<<<<<< HEAD + val p = partsScanned.toInt until math.min(partsScanned + numPartsToTry, totalParts).toInt + val res = sc.runJob(this, (it: Iterator[T]) => it.take(left).toArray, p) + + res.foreach(buf ++= _.take(num - buf.size)) + partsScanned += p.size +======= val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts) val res = sc.runJob(this, (it: Iterator[T]) => it.take(left).toArray, p) res.foreach(buf ++= _.take(num - buf.size)) partsScanned += numPartsToTry +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } buf.toArray diff --git a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala index a9b3d52bbee02..252132d78414b 100644 --- a/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/ReliableCheckpointRDD.scala @@ -174,8 +174,12 @@ private[spark] object ReliableCheckpointRDD extends Logging { fs.create(tempOutputPath, false, bufferSize) } else { // This is mainly for testing purpose +<<<<<<< HEAD + fs.create(tempOutputPath, false, bufferSize, fs.getDefaultReplication, blockSize) +======= fs.create(tempOutputPath, false, bufferSize, fs.getDefaultReplication(fs.getWorkingDirectory), blockSize) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } val serializer = env.serializer.newInstance() val serializeStream = serializer.serializeStream(fileOutputStream) diff --git a/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala b/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala index 8e1baae796fc5..da7c347d6cd03 100644 --- a/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/WholeTextFileRDD.scala @@ -20,7 +20,10 @@ package org.apache.spark.rdd import org.apache.hadoop.conf.{Configurable, Configuration} import org.apache.hadoop.io.{Text, Writable} import org.apache.hadoop.mapreduce.InputSplit +<<<<<<< HEAD +======= import org.apache.hadoop.mapreduce.task.JobContextImpl +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.{Partition, SparkContext} import org.apache.spark.input.WholeTextFileInputFormat @@ -45,7 +48,11 @@ private[spark] class WholeTextFileRDD( configurable.setConf(conf) case _ => } +<<<<<<< HEAD + val jobContext = newJobContext(conf, jobId) +======= val jobContext = new JobContextImpl(conf, jobId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 inputFormat.setMinPartitions(jobContext, minPartitions) val rawSplits = inputFormat.getSplits(jobContext).toArray val result = new Array[Partition](rawSplits.size) diff --git a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala index 56683771335a6..ee632aee27773 100644 --- a/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala +++ b/core/src/main/scala/org/apache/spark/rpc/RpcEnv.scala @@ -23,8 +23,12 @@ import java.nio.channels.ReadableByteChannel import scala.concurrent.Future import org.apache.spark.{SecurityManager, SparkConf} +<<<<<<< HEAD +import org.apache.spark.util.{RpcUtils, Utils} +======= import org.apache.spark.rpc.netty.NettyRpcEnvFactory import org.apache.spark.util.RpcUtils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** @@ -33,6 +37,18 @@ import org.apache.spark.util.RpcUtils */ private[spark] object RpcEnv { +<<<<<<< HEAD + private def getRpcEnvFactory(conf: SparkConf): RpcEnvFactory = { + val rpcEnvNames = Map( + "akka" -> "org.apache.spark.rpc.akka.AkkaRpcEnvFactory", + "netty" -> "org.apache.spark.rpc.netty.NettyRpcEnvFactory") + val rpcEnvName = conf.get("spark.rpc", "netty") + val rpcEnvFactoryClassName = rpcEnvNames.getOrElse(rpcEnvName.toLowerCase, rpcEnvName) + Utils.classForName(rpcEnvFactoryClassName).newInstance().asInstanceOf[RpcEnvFactory] + } + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def create( name: String, host: String, @@ -40,8 +56,14 @@ private[spark] object RpcEnv { conf: SparkConf, securityManager: SecurityManager, clientMode: Boolean = false): RpcEnv = { +<<<<<<< HEAD + // Using Reflection to create the RpcEnv to avoid to depend on Akka directly + val config = RpcEnvConfig(conf, name, host, port, securityManager, clientMode) + getRpcEnvFactory(conf).create(config) +======= val config = RpcEnvConfig(conf, name, host, port, securityManager, clientMode) new NettyRpcEnvFactory().create(config) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } @@ -89,11 +111,20 @@ private[spark] abstract class RpcEnv(conf: SparkConf) { } /** +<<<<<<< HEAD + * Retrieve the [[RpcEndpointRef]] represented by `systemName`, `address` and `endpointName`. + * This is a blocking action. + */ + def setupEndpointRef( + systemName: String, address: RpcAddress, endpointName: String): RpcEndpointRef = { + setupEndpointRefByURI(uriOf(systemName, address, endpointName)) +======= * Retrieve the [[RpcEndpointRef]] represented by `address` and `endpointName`. * This is a blocking action. */ def setupEndpointRef(address: RpcAddress, endpointName: String): RpcEndpointRef = { setupEndpointRefByURI(RpcEndpointAddress(address, endpointName).toString) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** @@ -115,6 +146,15 @@ private[spark] abstract class RpcEnv(conf: SparkConf) { def awaitTermination(): Unit /** +<<<<<<< HEAD + * Create a URI used to create a [[RpcEndpointRef]]. Use this one to create the URI instead of + * creating it manually because different [[RpcEnv]] may have different formats. + */ + def uriOf(systemName: String, address: RpcAddress, endpointName: String): String + + /** +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 * [[RpcEndpointRef]] cannot be deserialized without [[RpcEnv]]. So when deserializing any object * that contains [[RpcEndpointRef]]s, the deserialization codes should be wrapped by this method. */ diff --git a/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala new file mode 100644 index 0000000000000..9d098154f7190 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/rpc/akka/AkkaRpcEnv.scala @@ -0,0 +1,404 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rpc.akka + +import java.io.File +import java.nio.channels.ReadableByteChannel +import java.util.concurrent.ConcurrentHashMap + +import scala.concurrent.Future +import scala.language.postfixOps +import scala.reflect.ClassTag +import scala.util.control.NonFatal + +import akka.actor.{ActorSystem, ExtendedActorSystem, Actor, ActorRef, Props, Address} +import akka.event.Logging.Error +import akka.pattern.{ask => akkaAsk} +import akka.remote.{AssociationEvent, AssociatedEvent, DisassociatedEvent, AssociationErrorEvent} +import akka.serialization.JavaSerializer + +import org.apache.spark.{HttpFileServer, Logging, SecurityManager, SparkConf, SparkException} +import org.apache.spark.rpc._ +import org.apache.spark.util.{ActorLogReceive, AkkaUtils, ThreadUtils} + +/** + * A RpcEnv implementation based on Akka. + * + * TODO Once we remove all usages of Akka in other place, we can move this file to a new project and + * remove Akka from the dependencies. + */ +private[spark] class AkkaRpcEnv private[akka] ( + val actorSystem: ActorSystem, + val securityManager: SecurityManager, + conf: SparkConf, + boundPort: Int) + extends RpcEnv(conf) with Logging { + + private val defaultAddress: RpcAddress = { + val address = actorSystem.asInstanceOf[ExtendedActorSystem].provider.getDefaultAddress + // In some test case, ActorSystem doesn't bind to any address. + // So just use some default value since they are only some unit tests + RpcAddress(address.host.getOrElse("localhost"), address.port.getOrElse(boundPort)) + } + + override val address: RpcAddress = defaultAddress + + /** + * A lookup table to search a [[RpcEndpointRef]] for a [[RpcEndpoint]]. We need it to make + * [[RpcEndpoint.self]] work. + */ + private val endpointToRef = new ConcurrentHashMap[RpcEndpoint, RpcEndpointRef]() + + /** + * Need this map to remove `RpcEndpoint` from `endpointToRef` via a `RpcEndpointRef` + */ + private val refToEndpoint = new ConcurrentHashMap[RpcEndpointRef, RpcEndpoint]() + + private val _fileServer = new AkkaFileServer(conf, securityManager) + + private def registerEndpoint(endpoint: RpcEndpoint, endpointRef: RpcEndpointRef): Unit = { + endpointToRef.put(endpoint, endpointRef) + refToEndpoint.put(endpointRef, endpoint) + } + + private def unregisterEndpoint(endpointRef: RpcEndpointRef): Unit = { + val endpoint = refToEndpoint.remove(endpointRef) + if (endpoint != null) { + endpointToRef.remove(endpoint) + } + } + + /** + * Retrieve the [[RpcEndpointRef]] of `endpoint`. + */ + override def endpointRef(endpoint: RpcEndpoint): RpcEndpointRef = endpointToRef.get(endpoint) + + override def setupEndpoint(name: String, endpoint: RpcEndpoint): RpcEndpointRef = { + @volatile var endpointRef: AkkaRpcEndpointRef = null + // Use defered function because the Actor needs to use `endpointRef`. + // So `actorRef` should be created after assigning `endpointRef`. + val actorRef = () => actorSystem.actorOf(Props(new Actor with ActorLogReceive with Logging { + + assert(endpointRef != null) + + override def preStart(): Unit = { + // Listen for remote client network events + context.system.eventStream.subscribe(self, classOf[AssociationEvent]) + safelyCall(endpoint) { + endpoint.onStart() + } + } + + override def receiveWithLogging: Receive = { + case AssociatedEvent(_, remoteAddress, _) => + safelyCall(endpoint) { + endpoint.onConnected(akkaAddressToRpcAddress(remoteAddress)) + } + + case DisassociatedEvent(_, remoteAddress, _) => + safelyCall(endpoint) { + endpoint.onDisconnected(akkaAddressToRpcAddress(remoteAddress)) + } + + case AssociationErrorEvent(cause, localAddress, remoteAddress, inbound, _) => + safelyCall(endpoint) { + endpoint.onNetworkError(cause, akkaAddressToRpcAddress(remoteAddress)) + } + + case e: AssociationEvent => + // TODO ignore? + + case m: AkkaMessage => + logDebug(s"Received RPC message: $m") + safelyCall(endpoint) { + processMessage(endpoint, m, sender) + } + + case AkkaFailure(e) => + safelyCall(endpoint) { + throw e + } + + case message: Any => { + logWarning(s"Unknown message: $message") + } + + } + + override def postStop(): Unit = { + unregisterEndpoint(endpoint.self) + safelyCall(endpoint) { + endpoint.onStop() + } + } + + }), name = name) + endpointRef = new AkkaRpcEndpointRef(defaultAddress, actorRef, conf, initInConstructor = false) + registerEndpoint(endpoint, endpointRef) + // Now actorRef can be created safely + endpointRef.init() + endpointRef + } + + private def processMessage(endpoint: RpcEndpoint, m: AkkaMessage, _sender: ActorRef): Unit = { + val message = m.message + val needReply = m.needReply + val pf: PartialFunction[Any, Unit] = + if (needReply) { + endpoint.receiveAndReply(new RpcCallContext { + override def sendFailure(e: Throwable): Unit = { + _sender ! AkkaFailure(e) + } + + override def reply(response: Any): Unit = { + _sender ! AkkaMessage(response, false) + } + + // Use "lazy" because most of RpcEndpoints don't need "senderAddress" + override lazy val senderAddress: RpcAddress = + new AkkaRpcEndpointRef(defaultAddress, _sender, conf).address + }) + } else { + endpoint.receive + } + try { + pf.applyOrElse[Any, Unit](message, { message => + throw new SparkException(s"Unmatched message $message from ${_sender}") + }) + } catch { + case NonFatal(e) => + _sender ! AkkaFailure(e) + if (!needReply) { + // If the sender does not require a reply, it may not handle the exception. So we rethrow + // "e" to make sure it will be processed. + throw e + } + } + } + + /** + * Run `action` safely to avoid to crash the thread. If any non-fatal exception happens, it will + * call `endpoint.onError`. If `endpoint.onError` throws any non-fatal exception, just log it. + */ + private def safelyCall(endpoint: RpcEndpoint)(action: => Unit): Unit = { + try { + action + } catch { + case NonFatal(e) => { + try { + endpoint.onError(e) + } catch { + case NonFatal(e) => logError(s"Ignore error: ${e.getMessage}", e) + } + } + } + } + + private def akkaAddressToRpcAddress(address: Address): RpcAddress = { + RpcAddress(address.host.getOrElse(defaultAddress.host), + address.port.getOrElse(defaultAddress.port)) + } + + override def asyncSetupEndpointRefByURI(uri: String): Future[RpcEndpointRef] = { + import actorSystem.dispatcher + actorSystem.actorSelection(uri).resolveOne(defaultLookupTimeout.duration). + map(new AkkaRpcEndpointRef(defaultAddress, _, conf)). + // this is just in case there is a timeout from creating the future in resolveOne, we want the + // exception to indicate the conf that determines the timeout + recover(defaultLookupTimeout.addMessageIfTimeout) + } + + override def uriOf(systemName: String, address: RpcAddress, endpointName: String): String = { + AkkaUtils.address( + AkkaUtils.protocol(actorSystem), systemName, address.host, address.port, endpointName) + } + + override def shutdown(): Unit = { + actorSystem.shutdown() + _fileServer.shutdown() + } + + override def stop(endpoint: RpcEndpointRef): Unit = { + require(endpoint.isInstanceOf[AkkaRpcEndpointRef]) + actorSystem.stop(endpoint.asInstanceOf[AkkaRpcEndpointRef].actorRef) + } + + override def awaitTermination(): Unit = { + actorSystem.awaitTermination() + } + + override def toString: String = s"${getClass.getSimpleName}($actorSystem)" + + override def deserialize[T](deserializationAction: () => T): T = { + JavaSerializer.currentSystem.withValue(actorSystem.asInstanceOf[ExtendedActorSystem]) { + deserializationAction() + } + } + + override def openChannel(uri: String): ReadableByteChannel = { + throw new UnsupportedOperationException( + "AkkaRpcEnv's files should be retrieved using an HTTP client.") + } + + override def fileServer: RpcEnvFileServer = _fileServer + +} + +private[akka] class AkkaFileServer( + conf: SparkConf, + securityManager: SecurityManager) extends RpcEnvFileServer { + + @volatile private var httpFileServer: HttpFileServer = _ + + override def addFile(file: File): String = { + getFileServer().addFile(file) + } + + override def addJar(file: File): String = { + getFileServer().addJar(file) + } + + override def addDirectory(baseUri: String, path: File): String = { + val fixedBaseUri = validateDirectoryUri(baseUri) + getFileServer().addDirectory(fixedBaseUri, path.getAbsolutePath()) + } + + def shutdown(): Unit = { + if (httpFileServer != null) { + httpFileServer.stop() + } + } + + private def getFileServer(): HttpFileServer = { + if (httpFileServer == null) synchronized { + if (httpFileServer == null) { + httpFileServer = startFileServer() + } + } + httpFileServer + } + + private def startFileServer(): HttpFileServer = { + val fileServerPort = conf.getInt("spark.fileserver.port", 0) + val server = new HttpFileServer(conf, securityManager, fileServerPort) + server.initialize() + server + } + +} + +private[spark] class AkkaRpcEnvFactory extends RpcEnvFactory { + + def create(config: RpcEnvConfig): RpcEnv = { + val (actorSystem, boundPort) = AkkaUtils.createActorSystem( + config.name, config.host, config.port, config.conf, config.securityManager) + actorSystem.actorOf(Props(classOf[ErrorMonitor]), "ErrorMonitor") + new AkkaRpcEnv(actorSystem, config.securityManager, config.conf, boundPort) + } +} + +/** + * Monitor errors reported by Akka and log them. + */ +private[akka] class ErrorMonitor extends Actor with ActorLogReceive with Logging { + + override def preStart(): Unit = { + context.system.eventStream.subscribe(self, classOf[Error]) + } + + override def receiveWithLogging: Actor.Receive = { + case Error(cause: Throwable, _, _, message: String) => logDebug(message, cause) + } +} + +private[akka] class AkkaRpcEndpointRef( + @transient private val defaultAddress: RpcAddress, + @transient private val _actorRef: () => ActorRef, + conf: SparkConf, + initInConstructor: Boolean) + extends RpcEndpointRef(conf) with Logging { + + def this( + defaultAddress: RpcAddress, + _actorRef: ActorRef, + conf: SparkConf) = { + this(defaultAddress, () => _actorRef, conf, true) + } + + lazy val actorRef = _actorRef() + + override lazy val address: RpcAddress = { + val akkaAddress = actorRef.path.address + RpcAddress(akkaAddress.host.getOrElse(defaultAddress.host), + akkaAddress.port.getOrElse(defaultAddress.port)) + } + + override lazy val name: String = actorRef.path.name + + private[akka] def init(): Unit = { + // Initialize the lazy vals + actorRef + address + name + } + + if (initInConstructor) { + init() + } + + override def send(message: Any): Unit = { + actorRef ! AkkaMessage(message, false) + } + + override def ask[T: ClassTag](message: Any, timeout: RpcTimeout): Future[T] = { + actorRef.ask(AkkaMessage(message, true))(timeout.duration).flatMap { + // The function will run in the calling thread, so it should be short and never block. + case msg @ AkkaMessage(message, reply) => + if (reply) { + logError(s"Receive $msg but the sender cannot reply") + Future.failed(new SparkException(s"Receive $msg but the sender cannot reply")) + } else { + Future.successful(message) + } + case AkkaFailure(e) => + Future.failed(e) + }(ThreadUtils.sameThread).mapTo[T]. + recover(timeout.addMessageIfTimeout)(ThreadUtils.sameThread) + } + + override def toString: String = s"${getClass.getSimpleName}($actorRef)" + + final override def equals(that: Any): Boolean = that match { + case other: AkkaRpcEndpointRef => actorRef == other.actorRef + case _ => false + } + + final override def hashCode(): Int = if (actorRef == null) 0 else actorRef.hashCode() +} + +/** + * A wrapper to `message` so that the receiver knows if the sender expects a reply. + * @param message + * @param needReply if the sender expects a reply message + */ +private[akka] case class AkkaMessage(message: Any, needReply: Boolean) + +/** + * A reply with the failure error from the receiver to the sender + */ +private[akka] case class AkkaFailure(e: Throwable) diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala index ef876b1d8c15a..bad1444ad600e 100644 --- a/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala +++ b/core/src/main/scala/org/apache/spark/rpc/netty/NettyRpcEnv.scala @@ -257,6 +257,12 @@ private[netty] class NettyRpcEnv( dispatcher.getRpcEndpointRef(endpoint) } +<<<<<<< HEAD + override def uriOf(systemName: String, address: RpcAddress, endpointName: String): String = + new RpcEndpointAddress(address, endpointName).toString + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def shutdown(): Unit = { cleanup() } @@ -424,7 +430,11 @@ private[netty] object NettyRpcEnv extends Logging { } +<<<<<<< HEAD +private[netty] class NettyRpcEnvFactory extends RpcEnvFactory with Logging { +======= private[rpc] class NettyRpcEnvFactory extends RpcEnvFactory with Logging { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def create(config: RpcEnvConfig): RpcEnv = { val sparkConf = config.conf diff --git a/core/src/main/scala/org/apache/spark/rpc/netty/RpcEndpointAddress.scala b/core/src/main/scala/org/apache/spark/rpc/netty/RpcEndpointAddress.scala new file mode 100644 index 0000000000000..cd6f00cc08e6c --- /dev/null +++ b/core/src/main/scala/org/apache/spark/rpc/netty/RpcEndpointAddress.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rpc.netty + +import org.apache.spark.SparkException +import org.apache.spark.rpc.RpcAddress + +/** + * An address identifier for an RPC endpoint. + * + * The `rpcAddress` may be null, in which case the endpoint is registered via a client-only + * connection and can only be reached via the client that sent the endpoint reference. + * + * @param rpcAddress The socket address of the endpoint. + * @param name Name of the endpoint. + */ +private[netty] case class RpcEndpointAddress(val rpcAddress: RpcAddress, val name: String) { + + require(name != null, "RpcEndpoint name must be provided.") + + def this(host: String, port: Int, name: String) = { + this(RpcAddress(host, port), name) + } + + override val toString = if (rpcAddress != null) { + s"spark://$name@${rpcAddress.host}:${rpcAddress.port}" + } else { + s"spark-client://$name" + } +} + +private[netty] object RpcEndpointAddress { + + def apply(sparkUrl: String): RpcEndpointAddress = { + try { + val uri = new java.net.URI(sparkUrl) + val host = uri.getHost + val port = uri.getPort + val name = uri.getUserInfo + if (uri.getScheme != "spark" || + host == null || + port < 0 || + name == null || + (uri.getPath != null && !uri.getPath.isEmpty) || // uri.getPath returns "" instead of null + uri.getFragment != null || + uri.getQuery != null) { + throw new SparkException("Invalid Spark URL: " + sparkUrl) + } + new RpcEndpointAddress(host, port, name) + } catch { + case e: java.net.URISyntaxException => + throw new SparkException("Invalid Spark URL: " + sparkUrl, e) + } + } +} diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala index 68792c58c9b4e..c6f55198e6776 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala @@ -77,6 +77,17 @@ private[spark] class EventLoggingListener( // Only defined if the file system scheme is not local private var hadoopDataStream: Option[FSDataOutputStream] = None +<<<<<<< HEAD + // The Hadoop APIs have changed over time, so we use reflection to figure out + // the correct method to use to flush a hadoop data stream. See SPARK-1518 + // for details. + private val hadoopFlushMethod = { + val cls = classOf[FSDataOutputStream] + scala.util.Try(cls.getMethod("hflush")).getOrElse(cls.getMethod("sync")) + } + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private var writer: Option[PrintWriter] = None // For testing. Keep track of all JSON serialized events that have been logged. @@ -89,7 +100,11 @@ private[spark] class EventLoggingListener( * Creates the log file in the configured log directory. */ def start() { +<<<<<<< HEAD + if (!fileSystem.getFileStatus(new Path(logBaseDir)).isDir) { +======= if (!fileSystem.getFileStatus(new Path(logBaseDir)).isDirectory) { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 throw new IllegalArgumentException(s"Log directory $logBaseDir does not exist.") } @@ -139,7 +154,11 @@ private[spark] class EventLoggingListener( // scalastyle:on println if (flushLogger) { writer.foreach(_.flush()) +<<<<<<< HEAD + hadoopDataStream.foreach(hadoopFlushMethod.invoke(_)) +======= hadoopDataStream.foreach(_.hflush()) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } if (testing) { loggedEvents += eventJson diff --git a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala index 8235b10245376..012b39fe4dd90 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/InputFormatInfo.scala @@ -103,7 +103,11 @@ class InputFormatInfo(val configuration: Configuration, val inputFormatClazz: Cl val instance: org.apache.hadoop.mapreduce.InputFormat[_, _] = ReflectionUtils.newInstance(inputFormatClazz.asInstanceOf[Class[_]], conf).asInstanceOf[ org.apache.hadoop.mapreduce.InputFormat[_, _]] +<<<<<<< HEAD + val job = new Job(conf) +======= val job = Job.getInstance(conf) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val retval = new ArrayBuffer[SplitInfo]() val list = instance.getSplits(job) diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala new file mode 100644 index 0000000000000..f96eb8ca0ae00 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/scheduler/JobLogger.scala @@ -0,0 +1,277 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler + +import java.io.{File, FileNotFoundException, IOException, PrintWriter} +import java.text.SimpleDateFormat +import java.util.{Date, Properties} + +import scala.collection.mutable.HashMap + +import org.apache.spark._ +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.executor.TaskMetrics + +/** + * :: DeveloperApi :: + * A logger class to record runtime information for jobs in Spark. This class outputs one log file + * for each Spark job, containing tasks start/stop and shuffle information. JobLogger is a subclass + * of SparkListener, use addSparkListener to add JobLogger to a SparkContext after the SparkContext + * is created. Note that each JobLogger only works for one SparkContext + * + * NOTE: The functionality of this class is heavily stripped down to accommodate for a general + * refactor of the SparkListener interface. In its place, the EventLoggingListener is introduced + * to log application information as SparkListenerEvents. To enable this functionality, set + * spark.eventLog.enabled to true. + */ +@DeveloperApi +@deprecated("Log application information by setting spark.eventLog.enabled.", "1.0.0") +class JobLogger(val user: String, val logDirName: String) extends SparkListener with Logging { + + def this() = this(System.getProperty("user.name", ""), + String.valueOf(System.currentTimeMillis())) + + private val logDir = + if (System.getenv("SPARK_LOG_DIR") != null) { + System.getenv("SPARK_LOG_DIR") + } else { + "/tmp/spark-%s".format(user) + } + + private val jobIdToPrintWriter = new HashMap[Int, PrintWriter] + private val stageIdToJobId = new HashMap[Int, Int] + private val jobIdToStageIds = new HashMap[Int, Seq[Int]] + private val dateFormat = new ThreadLocal[SimpleDateFormat]() { + override def initialValue(): SimpleDateFormat = new SimpleDateFormat("yyyy/MM/dd HH:mm:ss") + } + + createLogDir() + + /** Create a folder for log files, the folder's name is the creation time of jobLogger */ + protected def createLogDir() { + val dir = new File(logDir + "/" + logDirName + "/") + if (dir.exists()) { + return + } + if (!dir.mkdirs()) { + // JobLogger should throw a exception rather than continue to construct this object. + throw new IOException("create log directory error:" + logDir + "/" + logDirName + "/") + } + } + + /** + * Create a log file for one job + * @param jobId ID of the job + * @throws FileNotFoundException Fail to create log file + */ + protected def createLogWriter(jobId: Int) { + try { + val fileWriter = new PrintWriter(logDir + "/" + logDirName + "/" + jobId) + jobIdToPrintWriter += (jobId -> fileWriter) + } catch { + case e: FileNotFoundException => e.printStackTrace() + } + } + + /** + * Close log file, and clean the stage relationship in stageIdToJobId + * @param jobId ID of the job + */ + protected def closeLogWriter(jobId: Int) { + jobIdToPrintWriter.get(jobId).foreach { fileWriter => + fileWriter.close() + jobIdToStageIds.get(jobId).foreach(_.foreach { stageId => + stageIdToJobId -= stageId + }) + jobIdToPrintWriter -= jobId + jobIdToStageIds -= jobId + } + } + + /** + * Build up the maps that represent stage-job relationships + * @param jobId ID of the job + * @param stageIds IDs of the associated stages + */ + protected def buildJobStageDependencies(jobId: Int, stageIds: Seq[Int]) = { + jobIdToStageIds(jobId) = stageIds + stageIds.foreach { stageId => stageIdToJobId(stageId) = jobId } + } + + /** + * Write info into log file + * @param jobId ID of the job + * @param info Info to be recorded + * @param withTime Controls whether to record time stamp before the info, default is true + */ + protected def jobLogInfo(jobId: Int, info: String, withTime: Boolean = true) { + var writeInfo = info + if (withTime) { + val date = new Date(System.currentTimeMillis()) + writeInfo = dateFormat.get.format(date) + ": " + info + } + // scalastyle:off println + jobIdToPrintWriter.get(jobId).foreach(_.println(writeInfo)) + // scalastyle:on println + } + + /** + * Write info into log file + * @param stageId ID of the stage + * @param info Info to be recorded + * @param withTime Controls whether to record time stamp before the info, default is true + */ + protected def stageLogInfo(stageId: Int, info: String, withTime: Boolean = true) { + stageIdToJobId.get(stageId).foreach(jobId => jobLogInfo(jobId, info, withTime)) + } + + /** + * Record task metrics into job log files, including execution info and shuffle metrics + * @param stageId Stage ID of the task + * @param status Status info of the task + * @param taskInfo Task description info + * @param taskMetrics Task running metrics + */ + protected def recordTaskMetrics(stageId: Int, status: String, + taskInfo: TaskInfo, taskMetrics: TaskMetrics) { + val info = " TID=" + taskInfo.taskId + " STAGE_ID=" + stageId + + " START_TIME=" + taskInfo.launchTime + " FINISH_TIME=" + taskInfo.finishTime + + " EXECUTOR_ID=" + taskInfo.executorId + " HOST=" + taskMetrics.hostname + val executorRunTime = " EXECUTOR_RUN_TIME=" + taskMetrics.executorRunTime + val gcTime = " GC_TIME=" + taskMetrics.jvmGCTime + val inputMetrics = taskMetrics.inputMetrics match { + case Some(metrics) => + " READ_METHOD=" + metrics.readMethod.toString + + " INPUT_BYTES=" + metrics.bytesRead + case None => "" + } + val outputMetrics = taskMetrics.outputMetrics match { + case Some(metrics) => + " OUTPUT_BYTES=" + metrics.bytesWritten + case None => "" + } + val shuffleReadMetrics = taskMetrics.shuffleReadMetrics match { + case Some(metrics) => + " BLOCK_FETCHED_TOTAL=" + metrics.totalBlocksFetched + + " BLOCK_FETCHED_LOCAL=" + metrics.localBlocksFetched + + " BLOCK_FETCHED_REMOTE=" + metrics.remoteBlocksFetched + + " REMOTE_FETCH_WAIT_TIME=" + metrics.fetchWaitTime + + " REMOTE_BYTES_READ=" + metrics.remoteBytesRead + + " LOCAL_BYTES_READ=" + metrics.localBytesRead + case None => "" + } + val writeMetrics = taskMetrics.shuffleWriteMetrics match { + case Some(metrics) => + " SHUFFLE_BYTES_WRITTEN=" + metrics.shuffleBytesWritten + + " SHUFFLE_WRITE_TIME=" + metrics.shuffleWriteTime + case None => "" + } + stageLogInfo(stageId, status + info + executorRunTime + gcTime + inputMetrics + outputMetrics + + shuffleReadMetrics + writeMetrics) + } + + /** + * When stage is submitted, record stage submit info + * @param stageSubmitted Stage submitted event + */ + override def onStageSubmitted(stageSubmitted: SparkListenerStageSubmitted) { + val stageInfo = stageSubmitted.stageInfo + stageLogInfo(stageInfo.stageId, "STAGE_ID=%d STATUS=SUBMITTED TASK_SIZE=%d".format( + stageInfo.stageId, stageInfo.numTasks)) + } + + /** + * When stage is completed, record stage completion status + * @param stageCompleted Stage completed event + */ + override def onStageCompleted(stageCompleted: SparkListenerStageCompleted) { + val stageId = stageCompleted.stageInfo.stageId + if (stageCompleted.stageInfo.failureReason.isEmpty) { + stageLogInfo(stageId, s"STAGE_ID=$stageId STATUS=COMPLETED") + } else { + stageLogInfo(stageId, s"STAGE_ID=$stageId STATUS=FAILED") + } + } + + /** + * When task ends, record task completion status and metrics + * @param taskEnd Task end event + */ + override def onTaskEnd(taskEnd: SparkListenerTaskEnd) { + val taskInfo = taskEnd.taskInfo + var taskStatus = "TASK_TYPE=%s".format(taskEnd.taskType) + val taskMetrics = if (taskEnd.taskMetrics != null) taskEnd.taskMetrics else TaskMetrics.empty + taskEnd.reason match { + case Success => taskStatus += " STATUS=SUCCESS" + recordTaskMetrics(taskEnd.stageId, taskStatus, taskInfo, taskMetrics) + case Resubmitted => + taskStatus += " STATUS=RESUBMITTED TID=" + taskInfo.taskId + + " STAGE_ID=" + taskEnd.stageId + stageLogInfo(taskEnd.stageId, taskStatus) + case FetchFailed(bmAddress, shuffleId, mapId, reduceId, message) => + taskStatus += " STATUS=FETCHFAILED TID=" + taskInfo.taskId + " STAGE_ID=" + + taskEnd.stageId + " SHUFFLE_ID=" + shuffleId + " MAP_ID=" + + mapId + " REDUCE_ID=" + reduceId + stageLogInfo(taskEnd.stageId, taskStatus) + case _ => + } + } + + /** + * When job ends, recording job completion status and close log file + * @param jobEnd Job end event + */ + override def onJobEnd(jobEnd: SparkListenerJobEnd) { + val jobId = jobEnd.jobId + var info = "JOB_ID=" + jobId + jobEnd.jobResult match { + case JobSucceeded => info += " STATUS=SUCCESS" + case JobFailed(exception) => + info += " STATUS=FAILED REASON=" + exception.getMessage.split("\\s+").foreach(info += _ + "_") + case _ => + } + jobLogInfo(jobId, info.substring(0, info.length - 1).toUpperCase) + closeLogWriter(jobId) + } + + /** + * Record job properties into job log file + * @param jobId ID of the job + * @param properties Properties of the job + */ + protected def recordJobProperties(jobId: Int, properties: Properties) { + if (properties != null) { + val description = properties.getProperty(SparkContext.SPARK_JOB_DESCRIPTION, "") + jobLogInfo(jobId, description, withTime = false) + } + } + + /** + * When job starts, record job property and stage graph + * @param jobStart Job start event + */ + override def onJobStart(jobStart: SparkListenerJobStart) { + val jobId = jobStart.jobId + val properties = jobStart.properties + createLogWriter(jobId) + recordJobProperties(jobId, properties) + buildJobStageDependencies(jobId, jobStart.stageIds) + jobLogInfo(jobId, "JOB_ID=" + jobId + " STATUS=STARTED") + } +} diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala index 781ecfff7e5e7..b9ad4845532f1 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SimrSchedulerBackend.scala @@ -19,9 +19,15 @@ package org.apache.spark.scheduler.cluster import org.apache.hadoop.fs.{Path, FileSystem} +<<<<<<< HEAD +import org.apache.spark.rpc.RpcAddress +import org.apache.spark.{Logging, SparkContext, SparkEnv} +import org.apache.spark.deploy.SparkHadoopUtil +======= import org.apache.spark.{Logging, SparkContext} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.rpc.RpcEndpointAddress +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.scheduler.TaskSchedulerImpl private[spark] class SimrSchedulerBackend( @@ -39,10 +45,16 @@ private[spark] class SimrSchedulerBackend( override def start() { super.start() +<<<<<<< HEAD + val driverUrl = rpcEnv.uriOf(SparkEnv.driverActorSystemName, + RpcAddress(sc.conf.get("spark.driver.host"), sc.conf.get("spark.driver.port").toInt), + CoarseGrainedSchedulerBackend.ENDPOINT_NAME) +======= val driverUrl = RpcEndpointAddress( sc.conf.get("spark.driver.host"), sc.conf.get("spark.driver.port").toInt, CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val conf = SparkHadoopUtil.get.newConfiguration(sc.conf) val fs = FileSystem.get(conf) diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala index 1209cce6d1a61..8ce748c8d6df0 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/SparkDeploySchedulerBackend.scala @@ -19,7 +19,11 @@ package org.apache.spark.scheduler.cluster import java.util.concurrent.Semaphore +<<<<<<< HEAD +import org.apache.spark.rpc.RpcAddress +======= import org.apache.spark.rpc.{RpcEndpointAddress, RpcAddress} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.{Logging, SparkConf, SparkContext, SparkEnv} import org.apache.spark.deploy.{ApplicationDescription, Command} import org.apache.spark.deploy.client.{AppClient, AppClientListener} @@ -54,10 +58,16 @@ private[spark] class SparkDeploySchedulerBackend( launcherBackend.connect() // The endpoint for executors to talk to us +<<<<<<< HEAD + val driverUrl = rpcEnv.uriOf(SparkEnv.driverActorSystemName, + RpcAddress(sc.conf.get("spark.driver.host"), sc.conf.get("spark.driver.port").toInt), + CoarseGrainedSchedulerBackend.ENDPOINT_NAME) +======= val driverUrl = RpcEndpointAddress( sc.conf.get("spark.driver.host"), sc.conf.get("spark.driver.port").toInt, CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val args = Seq( "--driver-url", driverUrl, "--executor-id", "{{EXECUTOR_ID}}", diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala index a4ed85cd2a4a3..43aa78a709664 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/CoarseMesosSchedulerBackend.scala @@ -31,7 +31,11 @@ import org.apache.mesos.{Scheduler => MScheduler, SchedulerDriver} import org.apache.spark.{SecurityManager, SparkContext, SparkEnv, SparkException, TaskState} import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.mesos.MesosExternalShuffleClient +<<<<<<< HEAD +import org.apache.spark.rpc.RpcAddress +======= import org.apache.spark.rpc.{RpcEndpointAddress, RpcAddress} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.scheduler.{SlaveLost, TaskSchedulerImpl} import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend import org.apache.spark.util.Utils @@ -215,10 +219,17 @@ private[spark] class CoarseMesosSchedulerBackend( if (conf.contains("spark.testing")) { "driverURL" } else { +<<<<<<< HEAD + sc.env.rpcEnv.uriOf( + SparkEnv.driverActorSystemName, + RpcAddress(conf.get("spark.driver.host"), conf.get("spark.driver.port").toInt), + CoarseGrainedSchedulerBackend.ENDPOINT_NAME) +======= RpcEndpointAddress( conf.get("spark.driver.host"), conf.get("spark.driver.port").toInt, CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala index 16815d51d4c67..8f200b015cce5 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/mesos/MesosClusterScheduler.scala @@ -18,6 +18,10 @@ package org.apache.spark.scheduler.cluster.mesos import java.io.File +<<<<<<< HEAD +import java.util.concurrent.locks.ReentrantLock +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.util.{Collections, Date, List => JList} import scala.collection.JavaConverters._ @@ -125,7 +129,11 @@ private[spark] class MesosClusterScheduler( private val retainedDrivers = conf.getInt("spark.mesos.retainedDrivers", 200) private val maxRetryWaitTime = conf.getInt("spark.mesos.cluster.retry.wait.max", 60) // 1 minute private val schedulerState = engineFactory.createEngine("scheduler") +<<<<<<< HEAD + private val stateLock = new ReentrantLock() +======= private val stateLock = new Object() +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private val finishedDrivers = new mutable.ArrayBuffer[MesosClusterSubmissionState](retainedDrivers) private var frameworkId: String = null diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala index 1b4538e6afb85..a8f101ef72b77 100644 --- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala @@ -34,6 +34,10 @@ import org.roaringbitmap.RoaringBitmap import org.apache.spark._ import org.apache.spark.api.python.PythonBroadcast +<<<<<<< HEAD +import org.apache.spark.broadcast.HttpBroadcast +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.network.util.ByteUnit import org.apache.spark.scheduler.{CompressedMapStatus, HighlyCompressedMapStatus} import org.apache.spark.storage._ @@ -106,6 +110,10 @@ class KryoSerializer(conf: SparkConf) kryo.register(classOf[SerializableWritable[_]], new KryoJavaSerializer()) kryo.register(classOf[SerializableConfiguration], new KryoJavaSerializer()) kryo.register(classOf[SerializableJobConf], new KryoJavaSerializer()) +<<<<<<< HEAD + kryo.register(classOf[HttpBroadcast[_]], new KryoJavaSerializer()) +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 kryo.register(classOf[PythonBroadcast], new KryoJavaSerializer()) kryo.register(classOf[GenericRecord], new GenericAvroSerializer(avroSchemas)) @@ -399,7 +407,16 @@ private[serializer] class KryoInputDataInputBridge(input: KryoInput) extends Dat override def readInt(): Int = input.readInt() override def readUnsignedShort(): Int = input.readShortUnsigned() override def skipBytes(n: Int): Int = { +<<<<<<< HEAD + var remaining: Long = n + while (remaining > 0) { + val skip = Math.min(Integer.MAX_VALUE, remaining).asInstanceOf[Int] + input.skip(skip) + remaining -= skip + } +======= input.skip(n) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 n } override def readFully(b: Array[Byte]): Unit = input.read(b) diff --git a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala index 9b6ed8cbbef10..2bec32226ccb7 100644 --- a/core/src/main/scala/org/apache/spark/ui/PagedTable.scala +++ b/core/src/main/scala/org/apache/spark/ui/PagedTable.scala @@ -17,6 +17,10 @@ package org.apache.spark.ui +<<<<<<< HEAD +import scala.xml.{Node, Unparsed} + +======= import java.net.URLDecoder import scala.collection.JavaConverters._ @@ -26,6 +30,7 @@ import com.google.common.base.Splitter import org.apache.spark.util.Utils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A data source that provides data for a page. * @@ -78,12 +83,15 @@ private[ui] trait PagedTable[T] { def tableCssClass: String +<<<<<<< HEAD +======= def pageSizeFormField: String def prevPageSizeFormField: String def pageNumberFormField: String +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def dataSource: PagedDataSource[T] def headers: Seq[Node] @@ -108,12 +116,16 @@ private[ui] trait PagedTable[T] { val PageData(totalPages, _) = _dataSource.pageData(1)
{pageNavigation(1, _dataSource.pageSize, totalPages)} +<<<<<<< HEAD +
{e.getMessage}
+=======

Error while rendering table:

               {Utils.exceptionString(e)}
             
+>>>>>>> 15bd73627e04591fd13667b4838c9098342db965
} } @@ -169,6 +181,27 @@ private[ui] trait PagedTable[T] { // The current page should be disabled so that it cannot be clicked.
  • {p}
  • } else { +<<<<<<< HEAD +
  • {p}
  • + } + } + val (goButtonJsFuncName, goButtonJsFunc) = goButtonJavascriptFunction + // When clicking the "Go" button, it will call this javascript method and then call + // "goButtonJsFuncName" + val formJs = + s"""$$(function(){ + | $$( "#form-$tableId-page" ).submit(function(event) { + | var page = $$("#form-$tableId-page-no").val() + | var pageSize = $$("#form-$tableId-page-size").val() + | pageSize = pageSize ? pageSize: 100; + | if (page != "") { + | ${goButtonJsFuncName}(page, pageSize); + | } + | event.preventDefault(); + | }); + |}); + """.stripMargin +=======
  • {p}
  • } } @@ -193,10 +226,20 @@ private[ui] trait PagedTable[T] { Seq.empty } } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965
    + + + + + +======= method="get" action={Unparsed(goButtonFormPath)} class="form-inline pull-right" @@ -219,6 +262,7 @@ private[ui] trait PagedTable[T] { class="span1" /> +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965
    @@ -227,7 +271,11 @@ private[ui] trait PagedTable[T] {
    +<<<<<<< HEAD + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } @@ -272,7 +340,16 @@ private[ui] trait PagedTable[T] { def pageLink(page: Int): String /** +<<<<<<< HEAD + * Only the implementation knows how to create the url with a page number and the page size, so we + * leave this one to the implementation. The implementation should create a JavaScript method that + * accepts a page number along with the page size and jumps to the page. The return value is this + * method name and its JavaScript codes. + */ + def goButtonJavascriptFunction: (String, String) +======= * Returns the submission path for the "go to page #" form. */ def goButtonFormPath: String +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala index 08e7576b0c08e..e320bde1d5442 100644 --- a/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala +++ b/core/src/main/scala/org/apache/spark/ui/jobs/StagePage.scala @@ -97,13 +97,19 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { val parameterTaskSortColumn = request.getParameter("task.sort") val parameterTaskSortDesc = request.getParameter("task.desc") val parameterTaskPageSize = request.getParameter("task.pageSize") +<<<<<<< HEAD +======= val parameterTaskPrevPageSize = request.getParameter("task.prevPageSize") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val taskPage = Option(parameterTaskPage).map(_.toInt).getOrElse(1) val taskSortColumn = Option(parameterTaskSortColumn).getOrElse("Index") val taskSortDesc = Option(parameterTaskSortDesc).map(_.toBoolean).getOrElse(false) val taskPageSize = Option(parameterTaskPageSize).map(_.toInt).getOrElse(100) +<<<<<<< HEAD +======= val taskPrevPageSize = Option(parameterTaskPrevPageSize).map(_.toInt).getOrElse(taskPageSize) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // If this is set, expand the dag visualization by default val expandDagVizParam = request.getParameter("expandDagViz") @@ -276,6 +282,8 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { accumulableRow, externalAccumulables.toSeq) +<<<<<<< HEAD +======= val page: Int = { // If the user has changed to a larger page size, then go to page 1 in order to avoid // IndexOutOfBoundsException. @@ -285,6 +293,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { 1 } } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val currentTime = System.currentTimeMillis() val (taskTable, taskTableHTML) = try { val _taskTable = new TaskPagedTable( @@ -303,6 +312,12 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { sortColumn = taskSortColumn, desc = taskSortDesc ) +<<<<<<< HEAD + (_taskTable, _taskTable.table(taskPage)) + } catch { + case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) => + (null,
    {e.getMessage}
    ) +======= (_taskTable, _taskTable.table(page)) } catch { case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) => @@ -314,6 +329,7 @@ private[ui] class StagePage(parent: StagesTab) extends WebUIPage("stage") { (null, errorMessage) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } val jsForScrollingDownToTaskTable = @@ -1233,6 +1249,9 @@ private[ui] class TaskPagedTable( override def tableId: String = "task-table" +<<<<<<< HEAD + override def tableCssClass: String = "table table-bordered table-condensed table-striped" +======= override def tableCssClass: String = "table table-bordered table-condensed table-striped table-head-clickable" @@ -1241,6 +1260,7 @@ private[ui] class TaskPagedTable( override def prevPageSizeFormField: String = "task.prevPageSize" override def pageNumberFormField: String = "task.page" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override val dataSource: TaskDataSource = new TaskDataSource( data, @@ -1257,6 +1277,26 @@ private[ui] class TaskPagedTable( override def pageLink(page: Int): String = { val encodedSortColumn = URLEncoder.encode(sortColumn, "UTF-8") +<<<<<<< HEAD + s"${basePath}&task.page=$page&task.sort=${encodedSortColumn}&task.desc=${desc}" + + s"&task.pageSize=${pageSize}" + } + + override def goButtonJavascriptFunction: (String, String) = { + val jsFuncName = "goToTaskPage" + val encodedSortColumn = URLEncoder.encode(sortColumn, "UTF-8") + val jsFunc = s""" + |currentTaskPageSize = ${pageSize} + |function goToTaskPage(page, pageSize) { + | // Set page to 1 if the page size changes + | page = pageSize == currentTaskPageSize ? page : 1; + | var url = "${basePath}&task.sort=${encodedSortColumn}&task.desc=${desc}" + + | "&task.page=" + page + "&task.pageSize=" + pageSize; + | window.location.href = url; + |} + """.stripMargin + (jsFuncName, jsFunc) +======= basePath + s"&$pageNumberFormField=$page" + s"&task.sort=$encodedSortColumn" + @@ -1267,6 +1307,7 @@ private[ui] class TaskPagedTable( override def goButtonFormPath: String = { val encodedSortColumn = URLEncoder.encode(sortColumn, "UTF-8") s"$basePath&task.sort=$encodedSortColumn&task.desc=$desc" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } def headers: Seq[Node] = { @@ -1315,6 +1356,23 @@ private[ui] class TaskPagedTable( val headerRow: Seq[Node] = { taskHeadersAndCssClasses.map { case (header, cssClass) => if (header == sortColumn) { +<<<<<<< HEAD + val headerLink = + s"$basePath&task.sort=${URLEncoder.encode(header, "UTF-8")}&task.desc=${!desc}" + + s"&task.pageSize=${pageSize}" + val js = Unparsed(s"window.location.href='${headerLink}'") + val arrow = if (desc) "▾" else "▴" // UP or DOWN + + {header} +  {Unparsed(arrow)} + + } else { + val headerLink = + s"$basePath&task.sort=${URLEncoder.encode(header, "UTF-8")}&task.pageSize=${pageSize}" + val js = Unparsed(s"window.location.href='${headerLink}'") + + {header} +======= val headerLink = Unparsed( basePath + s"&task.sort=${URLEncoder.encode(header, "UTF-8")}" + @@ -1336,6 +1394,7 @@ private[ui] class TaskPagedTable(
    {header} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala index 606d15d599e81..00652c16e4656 100644 --- a/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala +++ b/core/src/main/scala/org/apache/spark/ui/storage/RDDPage.scala @@ -38,13 +38,19 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") { val parameterBlockSortColumn = request.getParameter("block.sort") val parameterBlockSortDesc = request.getParameter("block.desc") val parameterBlockPageSize = request.getParameter("block.pageSize") +<<<<<<< HEAD +======= val parameterBlockPrevPageSize = request.getParameter("block.prevPageSize") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val blockPage = Option(parameterBlockPage).map(_.toInt).getOrElse(1) val blockSortColumn = Option(parameterBlockSortColumn).getOrElse("Block Name") val blockSortDesc = Option(parameterBlockSortDesc).map(_.toBoolean).getOrElse(false) val blockPageSize = Option(parameterBlockPageSize).map(_.toInt).getOrElse(100) +<<<<<<< HEAD +======= val blockPrevPageSize = Option(parameterBlockPrevPageSize).map(_.toInt).getOrElse(blockPageSize) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val rddId = parameterId.toInt val rddStorageInfo = AllRDDResource.getRDDStorageInfo(rddId, listener, includeDetails = true) @@ -58,6 +64,9 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") { rddStorageInfo.dataDistribution.get, id = Some("rdd-storage-by-worker-table")) // Block table +<<<<<<< HEAD + val (blockTable, blockTableHTML) = try { +======= val page: Int = { // If the user has changed to a larger page size, then go to page 1 in order to avoid // IndexOutOfBoundsException. @@ -68,16 +77,24 @@ private[ui] class RDDPage(parent: StorageTab) extends WebUIPage("rdd") { } } val blockTableHTML = try { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val _blockTable = new BlockPagedTable( UIUtils.prependBaseUri(parent.basePath) + s"/storage/rdd/?id=${rddId}", rddStorageInfo.partitions.get, blockPageSize, blockSortColumn, blockSortDesc) +<<<<<<< HEAD + (_blockTable, _blockTable.table(blockPage)) + } catch { + case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) => + (null,
    {e.getMessage}
    ) +======= _blockTable.table(page) } catch { case e @ (_ : IllegalArgumentException | _ : IndexOutOfBoundsException) =>
    {e.getMessage}
    +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } val jsForScrollingDownToBlockTable = @@ -237,6 +254,9 @@ private[ui] class BlockPagedTable( override def tableId: String = "rdd-storage-by-block-table" +<<<<<<< HEAD + override def tableCssClass: String = "table table-bordered table-condensed table-striped" +======= override def tableCssClass: String = "table table-bordered table-condensed table-striped table-head-clickable" @@ -245,6 +265,7 @@ private[ui] class BlockPagedTable( override def prevPageSizeFormField: String = "block.prevPageSize" override def pageNumberFormField: String = "block.page" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override val dataSource: BlockDataSource = new BlockDataSource( rddPartitions, @@ -254,6 +275,26 @@ private[ui] class BlockPagedTable( override def pageLink(page: Int): String = { val encodedSortColumn = URLEncoder.encode(sortColumn, "UTF-8") +<<<<<<< HEAD + s"${basePath}&block.page=$page&block.sort=${encodedSortColumn}&block.desc=${desc}" + + s"&block.pageSize=${pageSize}" + } + + override def goButtonJavascriptFunction: (String, String) = { + val jsFuncName = "goToBlockPage" + val encodedSortColumn = URLEncoder.encode(sortColumn, "UTF-8") + val jsFunc = s""" + |currentBlockPageSize = ${pageSize} + |function goToBlockPage(page, pageSize) { + | // Set page to 1 if the page size changes + | page = pageSize == currentBlockPageSize ? page : 1; + | var url = "${basePath}&block.sort=${encodedSortColumn}&block.desc=${desc}" + + | "&block.page=" + page + "&block.pageSize=" + pageSize; + | window.location.href = url; + |} + """.stripMargin + (jsFuncName, jsFunc) +======= basePath + s"&$pageNumberFormField=$page" + s"&block.sort=$encodedSortColumn" + @@ -264,6 +305,7 @@ private[ui] class BlockPagedTable( override def goButtonFormPath: String = { val encodedSortColumn = URLEncoder.encode(sortColumn, "UTF-8") s"$basePath&block.sort=$encodedSortColumn&block.desc=$desc" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } override def headers: Seq[Node] = { @@ -281,6 +323,24 @@ private[ui] class BlockPagedTable( val headerRow: Seq[Node] = { blockHeaders.map { header => if (header == sortColumn) { +<<<<<<< HEAD + val headerLink = + s"$basePath&block.sort=${URLEncoder.encode(header, "UTF-8")}&block.desc=${!desc}" + + s"&block.pageSize=${pageSize}" + val js = Unparsed(s"window.location.href='${headerLink}'") + val arrow = if (desc) "▾" else "▴" // UP or DOWN + + {header} +  {Unparsed(arrow)} + + } else { + val headerLink = + s"$basePath&block.sort=${URLEncoder.encode(header, "UTF-8")}" + + s"&block.pageSize=${pageSize}" + val js = Unparsed(s"window.location.href='${headerLink}'") + + {header} +======= val headerLink = Unparsed( basePath + s"&block.sort=${URLEncoder.encode(header, "UTF-8")}" + @@ -302,6 +362,7 @@ private[ui] class BlockPagedTable( {header} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/core/src/main/scala/org/apache/spark/util/ActorLogReceive.scala b/core/src/main/scala/org/apache/spark/util/ActorLogReceive.scala new file mode 100644 index 0000000000000..81a7cbde01ce5 --- /dev/null +++ b/core/src/main/scala/org/apache/spark/util/ActorLogReceive.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import akka.actor.Actor +import org.slf4j.Logger + +/** + * A trait to enable logging all Akka actor messages. Here's an example of using this: + * + * {{{ + * class BlockManagerMasterActor extends Actor with ActorLogReceive with Logging { + * ... + * override def receiveWithLogging = { + * case GetLocations(blockId) => + * sender ! getLocations(blockId) + * ... + * } + * ... + * } + * }}} + * + */ +private[spark] trait ActorLogReceive { + self: Actor => + + override def receive: Actor.Receive = new Actor.Receive { + + private val _receiveWithLogging = receiveWithLogging + + override def isDefinedAt(o: Any): Boolean = { + val handled = _receiveWithLogging.isDefinedAt(o) + if (!handled) { + log.debug(s"Received unexpected actor system event: $o") + } + handled + } + + override def apply(o: Any): Unit = { + if (log.isDebugEnabled) { + log.debug(s"[actor] received message $o from ${self.sender}") + } + val start = System.nanoTime + _receiveWithLogging.apply(o) + val timeTaken = (System.nanoTime - start).toDouble / 1000000 + if (log.isDebugEnabled) { + log.debug(s"[actor] handled message ($timeTaken ms) $o from ${self.sender}") + } + } + } + + def receiveWithLogging: Actor.Receive + + protected def log: Logger +} diff --git a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala index f2d93edd4fd2e..3e2e8901d8fc0 100644 --- a/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/AkkaUtils.scala @@ -19,11 +19,22 @@ package org.apache.spark.util import scala.collection.JavaConverters._ +<<<<<<< HEAD +import akka.actor.{ActorRef, ActorSystem, ExtendedActorSystem} +import akka.pattern.ask + +import com.typesafe.config.ConfigFactory +import org.apache.log4j.{Level, Logger} + +import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkEnv, SparkException} +import org.apache.spark.rpc.RpcTimeout +======= import akka.actor.{ActorSystem, ExtendedActorSystem} import com.typesafe.config.ConfigFactory import org.apache.log4j.{Level, Logger} import org.apache.spark.{Logging, SecurityManager, SparkConf} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Various utility classes for working with Akka. @@ -136,4 +147,107 @@ private[spark] object AkkaUtils extends Logging { /** Space reserved for extra data in an Akka message besides serialized task or task result. */ val reservedSizeBytes = 200 * 1024 +<<<<<<< HEAD + /** + * Send a message to the given actor and get its result within a default timeout, or + * throw a SparkException if this fails. + */ + def askWithReply[T]( + message: Any, + actor: ActorRef, + timeout: RpcTimeout): T = { + askWithReply[T](message, actor, maxAttempts = 1, retryInterval = Int.MaxValue, timeout) + } + + /** + * Send a message to the given actor and get its result within a default timeout, or + * throw a SparkException if this fails even after the specified number of retries. + */ + def askWithReply[T]( + message: Any, + actor: ActorRef, + maxAttempts: Int, + retryInterval: Long, + timeout: RpcTimeout): T = { + // TODO: Consider removing multiple attempts + if (actor == null) { + throw new SparkException(s"Error sending message [message = $message]" + + " as actor is null ") + } + var attempts = 0 + var lastException: Exception = null + while (attempts < maxAttempts) { + attempts += 1 + try { + val future = actor.ask(message)(timeout.duration) + val result = timeout.awaitResult(future) + if (result == null) { + throw new SparkException("Actor returned null") + } + return result.asInstanceOf[T] + } catch { + case ie: InterruptedException => throw ie + case e: Exception => + lastException = e + logWarning(s"Error sending message [message = $message] in $attempts attempts", e) + } + if (attempts < maxAttempts) { + Thread.sleep(retryInterval) + } + } + + throw new SparkException( + s"Error sending message [message = $message]", lastException) + } + + def makeDriverRef(name: String, conf: SparkConf, actorSystem: ActorSystem): ActorRef = { + val driverActorSystemName = SparkEnv.driverActorSystemName + val driverHost: String = conf.get("spark.driver.host", "localhost") + val driverPort: Int = conf.getInt("spark.driver.port", 7077) + Utils.checkHost(driverHost, "Expected hostname") + val url = address(protocol(actorSystem), driverActorSystemName, driverHost, driverPort, name) + val timeout = RpcUtils.lookupRpcTimeout(conf) + logInfo(s"Connecting to $name: $url") + timeout.awaitResult(actorSystem.actorSelection(url).resolveOne(timeout.duration)) + } + + def makeExecutorRef( + name: String, + conf: SparkConf, + host: String, + port: Int, + actorSystem: ActorSystem): ActorRef = { + val executorActorSystemName = SparkEnv.executorActorSystemName + Utils.checkHost(host, "Expected hostname") + val url = address(protocol(actorSystem), executorActorSystemName, host, port, name) + val timeout = RpcUtils.lookupRpcTimeout(conf) + logInfo(s"Connecting to $name: $url") + timeout.awaitResult(actorSystem.actorSelection(url).resolveOne(timeout.duration)) + } + + def protocol(actorSystem: ActorSystem): String = { + val akkaConf = actorSystem.settings.config + val sslProp = "akka.remote.netty.tcp.enable-ssl" + protocol(akkaConf.hasPath(sslProp) && akkaConf.getBoolean(sslProp)) + } + + def protocol(ssl: Boolean = false): String = { + if (ssl) { + "akka.ssl.tcp" + } else { + "akka.tcp" + } + } + + def address( + protocol: String, + systemName: String, + host: String, + port: Int, + actorName: String): String = { + s"$protocol://$systemName@$host:$port/user/$actorName" + } + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } diff --git a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala index a51f30b9c2921..a5fce8610b474 100644 --- a/core/src/main/scala/org/apache/spark/util/RpcUtils.scala +++ b/core/src/main/scala/org/apache/spark/util/RpcUtils.scala @@ -20,19 +20,34 @@ package org.apache.spark.util import scala.concurrent.duration.FiniteDuration import scala.language.postfixOps +<<<<<<< HEAD +import org.apache.spark.{SparkEnv, SparkConf} +import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, RpcTimeout} + +object RpcUtils { +======= import org.apache.spark.SparkConf import org.apache.spark.rpc.{RpcAddress, RpcEndpointRef, RpcEnv, RpcTimeout} private[spark] object RpcUtils { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Retrieve a [[RpcEndpointRef]] which is located in the driver via its name. */ def makeDriverRef(name: String, conf: SparkConf, rpcEnv: RpcEnv): RpcEndpointRef = { +<<<<<<< HEAD + val driverActorSystemName = SparkEnv.driverActorSystemName + val driverHost: String = conf.get("spark.driver.host", "localhost") + val driverPort: Int = conf.getInt("spark.driver.port", 7077) + Utils.checkHost(driverHost, "Expected hostname") + rpcEnv.setupEndpointRef(driverActorSystemName, RpcAddress(driverHost, driverPort), name) +======= val driverHost: String = conf.get("spark.driver.host", "localhost") val driverPort: Int = conf.getInt("spark.driver.port", 7077) Utils.checkHost(driverHost, "Expected hostname") rpcEnv.setupEndpointRef(RpcAddress(driverHost, driverPort), name) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** Returns the configured number of times to retry connecting */ @@ -46,7 +61,11 @@ private[spark] object RpcUtils { } /** Returns the default Spark timeout to use for RPC ask operations. */ +<<<<<<< HEAD + private[spark] def askRpcTimeout(conf: SparkConf): RpcTimeout = { +======= def askRpcTimeout(conf: SparkConf): RpcTimeout = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 RpcTimeout(conf, Seq("spark.rpc.askTimeout", "spark.network.timeout"), "120s") } @@ -56,7 +75,11 @@ private[spark] object RpcUtils { } /** Returns the default Spark timeout to use for RPC remote endpoint lookup. */ +<<<<<<< HEAD + private[spark] def lookupRpcTimeout(conf: SparkConf): RpcTimeout = { +======= def lookupRpcTimeout(conf: SparkConf): RpcTimeout = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 RpcTimeout(conf, Seq("spark.rpc.lookupTimeout", "spark.network.timeout"), "120s") } diff --git a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala index acc24ca0fb814..44d86f36ac578 100644 --- a/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala +++ b/core/src/main/scala/org/apache/spark/util/ShutdownHookManager.scala @@ -20,7 +20,11 @@ package org.apache.spark.util import java.io.File import java.util.PriorityQueue +<<<<<<< HEAD +import scala.util.{Failure, Success, Try} +======= import scala.util.Try +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.fs.FileSystem import org.apache.spark.Logging @@ -177,8 +181,26 @@ private [util] class SparkShutdownHookManager { val hookTask = new Runnable() { override def run(): Unit = runAll() } +<<<<<<< HEAD + Try(Utils.classForName("org.apache.hadoop.util.ShutdownHookManager")) match { + case Success(shmClass) => + val fsPriority = classOf[FileSystem] + .getField("SHUTDOWN_HOOK_PRIORITY") + .get(null) // static field, the value is not used + .asInstanceOf[Int] + val shm = shmClass.getMethod("get").invoke(null) + shm.getClass().getMethod("addShutdownHook", classOf[Runnable], classOf[Int]) + .invoke(shm, hookTask, Integer.valueOf(fsPriority + 30)) + + case Failure(_) => + // scalastyle:off runtimeaddshutdownhook + Runtime.getRuntime.addShutdownHook(new Thread(hookTask, "Spark Shutdown Hook")); + // scalastyle:on runtimeaddshutdownhook + } +======= org.apache.hadoop.util.ShutdownHookManager.get().addShutdownHook( hookTask, FileSystem.SHUTDOWN_HOOK_PRIORITY + 30) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } def runAll(): Unit = { diff --git a/core/src/test/java/org/apache/spark/JavaAPISuite.java b/core/src/test/java/org/apache/spark/JavaAPISuite.java index d91948e44694b..8bc556dcc3556 100644 --- a/core/src/test/java/org/apache/spark/JavaAPISuite.java +++ b/core/src/test/java/org/apache/spark/JavaAPISuite.java @@ -1246,7 +1246,11 @@ public Tuple2 call(Tuple2 pair) { JavaPairRDD output = sc.newAPIHadoopFile(outputDir, org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat.class, +<<<<<<< HEAD + IntWritable.class, Text.class, new Job().getConfiguration()); +======= IntWritable.class, Text.class, Job.getInstance().getConfiguration()); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 Assert.assertEquals(pairs.toString(), output.map(new Function, String>() { @Override public String call(Tuple2 x) { diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala index 2e47801aafd75..7d477a1eef66c 100644 --- a/core/src/test/scala/org/apache/spark/FileSuite.scala +++ b/core/src/test/scala/org/apache/spark/FileSuite.scala @@ -19,11 +19,20 @@ package org.apache.spark import java.io.{File, FileWriter} +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.input.PortableDataStream +import org.apache.spark.storage.StorageLevel + +import scala.io.Source + +======= import scala.io.Source import org.apache.spark.input.PortableDataStream import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.io._ import org.apache.hadoop.io.compress.DefaultCodec import org.apache.hadoop.mapred.{JobConf, FileAlreadyExistsException, FileSplit, TextInputFormat, TextOutputFormat} @@ -505,11 +514,19 @@ class FileSuite extends SparkFunSuite with LocalSparkContext { sc = new SparkContext("local", "test") val randomRDD = sc.parallelize( Array(("key1", "a"), ("key2", "a"), ("key3", "b"), ("key4", "c")), 1) +<<<<<<< HEAD + val job = new Job(sc.hadoopConfiguration) + job.setOutputKeyClass(classOf[String]) + job.setOutputValueClass(classOf[String]) + job.setOutputFormatClass(classOf[NewTextOutputFormat[String, String]]) + val jobConfig = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val job = Job.getInstance(sc.hadoopConfiguration) job.setOutputKeyClass(classOf[String]) job.setOutputValueClass(classOf[String]) job.setOutputFormatClass(classOf[NewTextOutputFormat[String, String]]) val jobConfig = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 jobConfig.set("mapred.output.dir", tempDir.getPath + "/outputDataset_new") randomRDD.saveAsNewAPIHadoopDataset(jobConfig) assert(new File(tempDir.getPath + "/outputDataset_new/part-r-00000").exists() === true) diff --git a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala index 5b29d69cd9428..3a82ede31ee19 100644 --- a/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala +++ b/core/src/test/scala/org/apache/spark/MapOutputTrackerSuite.scala @@ -125,7 +125,11 @@ class MapOutputTrackerSuite extends SparkFunSuite { val slaveRpcEnv = createRpcEnv("spark-slave", hostname, 0, new SecurityManager(conf)) val slaveTracker = new MapOutputTrackerWorker(conf) slaveTracker.trackerEndpoint = +<<<<<<< HEAD + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) +======= slaveRpcEnv.setupEndpointRef(rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 masterTracker.registerShuffle(10, 1) masterTracker.incrementEpoch() diff --git a/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala b/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala index 33270bec6247c..ce7ad2d9847c5 100644 --- a/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala +++ b/core/src/test/scala/org/apache/spark/SSLSampleConfigs.scala @@ -41,6 +41,10 @@ object SSLSampleConfigs { def sparkSSLConfig(): SparkConf = { val conf = new SparkConf(loadDefaults = false) +<<<<<<< HEAD + conf.set("spark.rpc", "akka") +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 conf.set("spark.ssl.enabled", "true") conf.set("spark.ssl.keyStore", keyStorePath) conf.set("spark.ssl.keyStorePassword", "password") @@ -54,6 +58,10 @@ object SSLSampleConfigs { def sparkSSLConfigUntrusted(): SparkConf = { val conf = new SparkConf(loadDefaults = false) +<<<<<<< HEAD + conf.set("spark.rpc", "akka") +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 conf.set("spark.ssl.enabled", "true") conf.set("spark.ssl.keyStore", untrustedKeyStorePath) conf.set("spark.ssl.keyStorePassword", "password") diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala index 172ef050cc275..7541402e133a0 100644 --- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala +++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala @@ -274,6 +274,8 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { } } +<<<<<<< HEAD +======= test("Default path for file based RDDs is properly set (SPARK-12517)") { sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) @@ -299,6 +301,7 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext { sc.stop() } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 test("calling multiple sc.stop() must not throw any exception") { noException should be thrownBy { sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local")) diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala index 88fdbbdaec902..dfd21ee9cd037 100644 --- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala +++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala @@ -45,8 +45,44 @@ class DummyBroadcastClass(rdd: RDD[Int]) extends Serializable { class BroadcastSuite extends SparkFunSuite with LocalSparkContext { +<<<<<<< HEAD + private val httpConf = broadcastConf("HttpBroadcastFactory") + private val torrentConf = broadcastConf("TorrentBroadcastFactory") + + test("Using HttpBroadcast locally") { + sc = new SparkContext("local", "test", httpConf) + val list = List[Int](1, 2, 3, 4) + val broadcast = sc.broadcast(list) + val results = sc.parallelize(1 to 2).map(x => (x, broadcast.value.sum)) + assert(results.collect().toSet === Set((1, 10), (2, 10))) + } + + test("Accessing HttpBroadcast variables from multiple threads") { + sc = new SparkContext("local[10]", "test", httpConf) + val list = List[Int](1, 2, 3, 4) + val broadcast = sc.broadcast(list) + val results = sc.parallelize(1 to 10).map(x => (x, broadcast.value.sum)) + assert(results.collect().toSet === (1 to 10).map(x => (x, 10)).toSet) + } + + test("Accessing HttpBroadcast variables in a local cluster") { + val numSlaves = 4 + val conf = httpConf.clone + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + conf.set("spark.broadcast.compress", "true") + sc = new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test", conf) + val list = List[Int](1, 2, 3, 4) + val broadcast = sc.broadcast(list) + val results = sc.parallelize(1 to numSlaves).map(x => (x, broadcast.value.sum)) + assert(results.collect().toSet === (1 to numSlaves).map(x => (x, 10)).toSet) + } + + test("Using TorrentBroadcast locally") { + sc = new SparkContext("local", "test", torrentConf) +======= test("Using TorrentBroadcast locally") { sc = new SparkContext("local", "test") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val list = List[Int](1, 2, 3, 4) val broadcast = sc.broadcast(list) val results = sc.parallelize(1 to 2).map(x => (x, broadcast.value.sum)) @@ -54,7 +90,11 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { } test("Accessing TorrentBroadcast variables from multiple threads") { +<<<<<<< HEAD + sc = new SparkContext("local[10]", "test", torrentConf) +======= sc = new SparkContext("local[10]", "test") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val list = List[Int](1, 2, 3, 4) val broadcast = sc.broadcast(list) val results = sc.parallelize(1 to 10).map(x => (x, broadcast.value.sum)) @@ -63,7 +103,11 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { test("Accessing TorrentBroadcast variables in a local cluster") { val numSlaves = 4 +<<<<<<< HEAD + val conf = torrentConf.clone +======= val conf = new SparkConf +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") conf.set("spark.broadcast.compress", "true") sc = new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test", conf) @@ -93,13 +137,39 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { test("Test Lazy Broadcast variables with TorrentBroadcast") { val numSlaves = 2 +<<<<<<< HEAD + val conf = torrentConf.clone + sc = new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test", conf) + val rdd = sc.parallelize(1 to numSlaves) + +======= sc = new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test") val rdd = sc.parallelize(1 to numSlaves) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val results = new DummyBroadcastClass(rdd).doSomething() assert(results.toSet === (1 to numSlaves).map(x => (x, false)).toSet) } +<<<<<<< HEAD + test("Unpersisting HttpBroadcast on executors only in local mode") { + testUnpersistHttpBroadcast(distributed = false, removeFromDriver = false) + } + + test("Unpersisting HttpBroadcast on executors and driver in local mode") { + testUnpersistHttpBroadcast(distributed = false, removeFromDriver = true) + } + + test("Unpersisting HttpBroadcast on executors only in distributed mode") { + testUnpersistHttpBroadcast(distributed = true, removeFromDriver = false) + } + + test("Unpersisting HttpBroadcast on executors and driver in distributed mode") { + testUnpersistHttpBroadcast(distributed = true, removeFromDriver = true) + } + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 test("Unpersisting TorrentBroadcast on executors only in local mode") { testUnpersistTorrentBroadcast(distributed = false, removeFromDriver = false) } @@ -131,6 +201,69 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { } /** +<<<<<<< HEAD + * Verify the persistence of state associated with an HttpBroadcast in either local mode or + * local-cluster mode (when distributed = true). + * + * This test creates a broadcast variable, uses it on all executors, and then unpersists it. + * In between each step, this test verifies that the broadcast blocks and the broadcast file + * are present only on the expected nodes. + */ + private def testUnpersistHttpBroadcast(distributed: Boolean, removeFromDriver: Boolean) { + val numSlaves = if (distributed) 2 else 0 + + // Verify that the broadcast file is created, and blocks are persisted only on the driver + def afterCreation(broadcastId: Long, bmm: BlockManagerMaster) { + val blockId = BroadcastBlockId(broadcastId) + val statuses = bmm.getBlockStatus(blockId, askSlaves = true) + assert(statuses.size === 1) + statuses.head match { case (bm, status) => + assert(bm.isDriver, "Block should only be on the driver") + assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK) + assert(status.memSize > 0, "Block should be in memory store on the driver") + assert(status.diskSize === 0, "Block should not be in disk store on the driver") + } + if (distributed) { + // this file is only generated in distributed mode + assert(HttpBroadcast.getFile(blockId.broadcastId).exists, "Broadcast file not found!") + } + } + + // Verify that blocks are persisted in both the executors and the driver + def afterUsingBroadcast(broadcastId: Long, bmm: BlockManagerMaster) { + val blockId = BroadcastBlockId(broadcastId) + val statuses = bmm.getBlockStatus(blockId, askSlaves = true) + assert(statuses.size === numSlaves + 1) + statuses.foreach { case (_, status) => + assert(status.storageLevel === StorageLevel.MEMORY_AND_DISK) + assert(status.memSize > 0, "Block should be in memory store") + assert(status.diskSize === 0, "Block should not be in disk store") + } + } + + // Verify that blocks are unpersisted on all executors, and on all nodes if removeFromDriver + // is true. In the latter case, also verify that the broadcast file is deleted on the driver. + def afterUnpersist(broadcastId: Long, bmm: BlockManagerMaster) { + val blockId = BroadcastBlockId(broadcastId) + val statuses = bmm.getBlockStatus(blockId, askSlaves = true) + val expectedNumBlocks = if (removeFromDriver) 0 else 1 + val possiblyNot = if (removeFromDriver) "" else " not" + assert(statuses.size === expectedNumBlocks, + "Block should%s be unpersisted on the driver".format(possiblyNot)) + if (distributed && removeFromDriver) { + // this file is only generated in distributed mode + assert(!HttpBroadcast.getFile(blockId.broadcastId).exists, + "Broadcast file should%s be deleted".format(possiblyNot)) + } + } + + testUnpersistBroadcast(distributed, numSlaves, httpConf, afterCreation, + afterUsingBroadcast, afterUnpersist, removeFromDriver) + } + + /** +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 * Verify the persistence of state associated with an TorrentBroadcast in a local-cluster. * * This test creates a broadcast variable, uses it on all executors, and then unpersists it. @@ -175,7 +308,11 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { assert(statuses.size === expectedNumBlocks) } +<<<<<<< HEAD + testUnpersistBroadcast(distributed, numSlaves, torrentConf, afterCreation, +======= testUnpersistBroadcast(distributed, numSlaves, afterCreation, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 afterUsingBroadcast, afterUnpersist, removeFromDriver) } @@ -191,6 +328,10 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { private def testUnpersistBroadcast( distributed: Boolean, numSlaves: Int, // used only when distributed = true +<<<<<<< HEAD + broadcastConf: SparkConf, +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 afterCreation: (Long, BlockManagerMaster) => Unit, afterUsingBroadcast: (Long, BlockManagerMaster) => Unit, afterUnpersist: (Long, BlockManagerMaster) => Unit, @@ -198,7 +339,11 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { sc = if (distributed) { val _sc = +<<<<<<< HEAD + new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test", broadcastConf) +======= new SparkContext("local-cluster[%d, 1, 1024]".format(numSlaves), "test") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Wait until all salves are up try { _sc.jobProgressListener.waitUntilExecutorsUp(numSlaves, 60000) @@ -209,7 +354,11 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { throw e } } else { +<<<<<<< HEAD + new SparkContext("local", "test", broadcastConf) +======= new SparkContext("local", "test") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } val blockManagerMaster = sc.env.blockManager.master val list = List[Int](1, 2, 3, 4) @@ -246,6 +395,16 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext { assert(results.collect().toSet === (1 to partitions).map(x => (x, list.sum)).toSet) } } +<<<<<<< HEAD + + /** Helper method to create a SparkConf that uses the given broadcast factory. */ + private def broadcastConf(factoryName: String): SparkConf = { + val conf = new SparkConf + conf.set("spark.broadcast.factory", "org.apache.spark.broadcast.%s".format(factoryName)) + conf + } +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } package object testPackage extends Assertions { diff --git a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala index ab3d4cafebefa..5be75dfa35e74 100644 --- a/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/StandaloneDynamicAllocationSuite.scala @@ -474,7 +474,11 @@ class StandaloneDynamicAllocationSuite (0 until numWorkers).map { i => val rpcEnv = workerRpcEnvs(i) val worker = new Worker(rpcEnv, 0, cores, memory, Array(masterRpcEnv.address), +<<<<<<< HEAD + Worker.SYSTEM_NAME + i, Worker.ENDPOINT_NAME, null, conf, securityManager) +======= Worker.ENDPOINT_NAME, null, conf, securityManager) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 rpcEnv.setupEndpoint(Worker.ENDPOINT_NAME, worker) worker } diff --git a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala index eb794b6739d5e..33806654e3bdc 100644 --- a/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/client/AppClientSuite.scala @@ -147,7 +147,11 @@ class AppClientSuite extends SparkFunSuite with LocalSparkContext with BeforeAnd (0 until numWorkers).map { i => val rpcEnv = workerRpcEnvs(i) val worker = new Worker(rpcEnv, 0, cores, memory, Array(masterRpcEnv.address), +<<<<<<< HEAD + Worker.SYSTEM_NAME + i, Worker.ENDPOINT_NAME, null, conf, securityManager) +======= Worker.ENDPOINT_NAME, null, conf, securityManager) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 rpcEnv.setupEndpoint(Worker.ENDPOINT_NAME, worker) worker } diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala index 10e33a32ba4c3..0c158a29c4cc2 100644 --- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala @@ -98,7 +98,11 @@ class MasterSuite extends SparkFunSuite with Matchers with Eventually with Priva Master.startRpcEnvAndEndpoint("127.0.0.1", 0, 0, conf) try { +<<<<<<< HEAD + rpcEnv.setupEndpointRef(Master.SYSTEM_NAME, rpcEnv.address, Master.ENDPOINT_NAME) +======= rpcEnv.setupEndpointRef(rpcEnv.address, Master.ENDPOINT_NAME) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 CustomPersistenceEngine.lastInstance.isDefined shouldBe true val persistenceEngine = CustomPersistenceEngine.lastInstance.get diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala index 082d5e86eb512..30aaa9a7fb9db 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerSuite.scala @@ -67,7 +67,11 @@ class WorkerSuite extends SparkFunSuite with Matchers { conf.set("spark.worker.ui.retainedExecutors", 2.toString) val rpcEnv = RpcEnv.create("test", "localhost", 12345, conf, new SecurityManager(conf)) val worker = new Worker(rpcEnv, 50000, 20, 1234 * 5, Array.fill(1)(RpcAddress("1.2.3.4", 1234)), +<<<<<<< HEAD + "sparkWorker1", "Worker", "/tmp", conf, new SecurityManager(conf)) +======= "Worker", "/tmp", conf, new SecurityManager(conf)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // initialize workers for (i <- 0 until 5) { worker.executors += s"app1/$i" -> createExecutorRunner(i) @@ -93,7 +97,11 @@ class WorkerSuite extends SparkFunSuite with Matchers { conf.set("spark.worker.ui.retainedExecutors", 30.toString) val rpcEnv = RpcEnv.create("test", "localhost", 12345, conf, new SecurityManager(conf)) val worker = new Worker(rpcEnv, 50000, 20, 1234 * 5, Array.fill(1)(RpcAddress("1.2.3.4", 1234)), +<<<<<<< HEAD + "sparkWorker1", "Worker", "/tmp", conf, new SecurityManager(conf)) +======= "Worker", "/tmp", conf, new SecurityManager(conf)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // initialize workers for (i <- 0 until 50) { worker.executors += s"app1/$i" -> createExecutorRunner(i) @@ -128,7 +136,11 @@ class WorkerSuite extends SparkFunSuite with Matchers { conf.set("spark.worker.ui.retainedDrivers", 2.toString) val rpcEnv = RpcEnv.create("test", "localhost", 12345, conf, new SecurityManager(conf)) val worker = new Worker(rpcEnv, 50000, 20, 1234 * 5, Array.fill(1)(RpcAddress("1.2.3.4", 1234)), +<<<<<<< HEAD + "sparkWorker1", "Worker", "/tmp", conf, new SecurityManager(conf)) +======= "Worker", "/tmp", conf, new SecurityManager(conf)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // initialize workers for (i <- 0 until 5) { val driverId = s"driverId-$i" @@ -154,7 +166,11 @@ class WorkerSuite extends SparkFunSuite with Matchers { conf.set("spark.worker.ui.retainedDrivers", 30.toString) val rpcEnv = RpcEnv.create("test", "localhost", 12345, conf, new SecurityManager(conf)) val worker = new Worker(rpcEnv, 50000, 20, 1234 * 5, Array.fill(1)(RpcAddress("1.2.3.4", 1234)), +<<<<<<< HEAD + "sparkWorker1", "Worker", "/tmp", conf, new SecurityManager(conf)) +======= "Worker", "/tmp", conf, new SecurityManager(conf)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // initialize workers for (i <- 0 until 50) { val driverId = s"driverId-$i" diff --git a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala index 0ffd91d8ffc06..ffbad3507184d 100644 --- a/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala +++ b/core/src/test/scala/org/apache/spark/deploy/worker/WorkerWatcherSuite.scala @@ -19,13 +19,21 @@ package org.apache.spark.deploy.worker import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.SecurityManager +<<<<<<< HEAD +import org.apache.spark.rpc.{RpcAddress, RpcEnv} +======= import org.apache.spark.rpc.{RpcEndpointAddress, RpcAddress, RpcEnv} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 class WorkerWatcherSuite extends SparkFunSuite { test("WorkerWatcher shuts down on valid disassociation") { val conf = new SparkConf() val rpcEnv = RpcEnv.create("test", "localhost", 12345, conf, new SecurityManager(conf)) +<<<<<<< HEAD + val targetWorkerUrl = rpcEnv.uriOf("test", RpcAddress("1.2.3.4", 1234), "Worker") +======= val targetWorkerUrl = RpcEndpointAddress(RpcAddress("1.2.3.4", 1234), "Worker").toString +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val workerWatcher = new WorkerWatcher(rpcEnv, targetWorkerUrl, isTesting = true) rpcEnv.setupEndpoint("worker-watcher", workerWatcher) workerWatcher.onDisconnected(RpcAddress("1.2.3.4", 1234)) @@ -36,7 +44,11 @@ class WorkerWatcherSuite extends SparkFunSuite { test("WorkerWatcher stays alive on invalid disassociation") { val conf = new SparkConf() val rpcEnv = RpcEnv.create("test", "localhost", 12345, conf, new SecurityManager(conf)) +<<<<<<< HEAD + val targetWorkerUrl = rpcEnv.uriOf("test", RpcAddress("1.2.3.4", 1234), "Worker") +======= val targetWorkerUrl = RpcEndpointAddress(RpcAddress("1.2.3.4", 1234), "Worker").toString +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val otherRpcAddress = RpcAddress("4.3.2.1", 1234) val workerWatcher = new WorkerWatcher(rpcEnv, targetWorkerUrl, isTesting = true) rpcEnv.setupEndpoint("worker-watcher", workerWatcher) diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala index 924fce7f61c26..79ed67ddc646e 100644 --- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala @@ -94,7 +94,11 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { val anotherEnv = createRpcEnv(new SparkConf(), "remote", 0, clientMode = true) // Use anotherEnv to find out the RpcEndpointRef +<<<<<<< HEAD + val rpcEndpointRef = anotherEnv.setupEndpointRef("local", env.address, "send-remotely") +======= val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "send-remotely") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 try { rpcEndpointRef.send("hello") eventually(timeout(5 seconds), interval(10 millis)) { @@ -148,7 +152,11 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { val anotherEnv = createRpcEnv(new SparkConf(), "remote", 0, clientMode = true) // Use anotherEnv to find out the RpcEndpointRef +<<<<<<< HEAD + val rpcEndpointRef = anotherEnv.setupEndpointRef("local", env.address, "ask-remotely") +======= val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "ask-remotely") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 try { val reply = rpcEndpointRef.askWithRetry[String]("hello") assert("hello" === reply) @@ -176,7 +184,11 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { conf.set("spark.rpc.numRetries", "1") val anotherEnv = createRpcEnv(conf, "remote", 0, clientMode = true) // Use anotherEnv to find out the RpcEndpointRef +<<<<<<< HEAD + val rpcEndpointRef = anotherEnv.setupEndpointRef("local", env.address, "ask-timeout") +======= val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "ask-timeout") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 try { // Any exception thrown in askWithRetry is wrapped with a SparkException and set as the cause val e = intercept[SparkException] { @@ -435,7 +447,11 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { val anotherEnv = createRpcEnv(new SparkConf(), "remote", 0, clientMode = true) // Use anotherEnv to find out the RpcEndpointRef +<<<<<<< HEAD + val rpcEndpointRef = anotherEnv.setupEndpointRef("local", env.address, "sendWithReply-remotely") +======= val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "sendWithReply-remotely") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 try { val f = rpcEndpointRef.ask[String]("hello") val ack = Await.result(f, 5 seconds) @@ -475,7 +491,12 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { val anotherEnv = createRpcEnv(new SparkConf(), "remote", 0, clientMode = true) // Use anotherEnv to find out the RpcEndpointRef +<<<<<<< HEAD + val rpcEndpointRef = anotherEnv.setupEndpointRef( + "local", env.address, "sendWithReply-remotely-error") +======= val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "sendWithReply-remotely-error") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 try { val f = rpcEndpointRef.ask[String]("hello") val e = intercept[SparkException] { @@ -526,7 +547,12 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { val (_, events) = setupNetworkEndpoint(serverEnv1, "network-events") val (serverRef2, _) = setupNetworkEndpoint(serverEnv2, "network-events") try { +<<<<<<< HEAD + val serverRefInServer2 = + serverEnv1.setupEndpointRef("server2", serverRef2.address, serverRef2.name) +======= val serverRefInServer2 = serverEnv1.setupEndpointRef(serverRef2.address, serverRef2.name) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Send a message to set up the connection serverRefInServer2.send("hello") @@ -554,7 +580,12 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { val (serverRef, events) = setupNetworkEndpoint(serverEnv, "network-events") val clientEnv = createRpcEnv(new SparkConf(), "client", 0, clientMode = true) try { +<<<<<<< HEAD + val serverRefInClient = + clientEnv.setupEndpointRef("server", serverRef.address, serverRef.name) +======= val serverRefInClient = clientEnv.setupEndpointRef(serverRef.address, serverRef.name) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Send a message to set up the connection serverRefInClient.send("hello") @@ -585,7 +616,12 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { val (_, events) = setupNetworkEndpoint(clientEnv, "network-events") val (serverRef, _) = setupNetworkEndpoint(serverEnv, "network-events") try { +<<<<<<< HEAD + val serverRefInClient = + clientEnv.setupEndpointRef("server", serverRef.address, serverRef.name) +======= val serverRefInClient = clientEnv.setupEndpointRef(serverRef.address, serverRef.name) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Send a message to set up the connection serverRefInClient.send("hello") @@ -619,8 +655,13 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { val anotherEnv = createRpcEnv(new SparkConf(), "remote", 0, clientMode = true) // Use anotherEnv to find out the RpcEndpointRef +<<<<<<< HEAD + val rpcEndpointRef = anotherEnv.setupEndpointRef( + "local", env.address, "sendWithReply-unserializable-error") +======= val rpcEndpointRef = anotherEnv.setupEndpointRef(env.address, "sendWithReply-unserializable-error") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 try { val f = rpcEndpointRef.ask[String]("hello") val e = intercept[Exception] { @@ -657,7 +698,12 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { case msg: String => message = msg } }) +<<<<<<< HEAD + val rpcEndpointRef = + remoteEnv.setupEndpointRef("authentication-local", localEnv.address, "send-authentication") +======= val rpcEndpointRef = remoteEnv.setupEndpointRef(localEnv.address, "send-authentication") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 rpcEndpointRef.send("hello") eventually(timeout(5 seconds), interval(10 millis)) { assert("hello" === message) @@ -688,7 +734,12 @@ abstract class RpcEnvSuite extends SparkFunSuite with BeforeAndAfterAll { } } }) +<<<<<<< HEAD + val rpcEndpointRef = + remoteEnv.setupEndpointRef("authentication-local", localEnv.address, "ask-authentication") +======= val rpcEndpointRef = remoteEnv.setupEndpointRef(localEnv.address, "ask-authentication") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val reply = rpcEndpointRef.askWithRetry[String]("hello") assert("hello" === reply) } finally { diff --git a/core/src/test/scala/org/apache/spark/rpc/akka/AkkaRpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/akka/AkkaRpcEnvSuite.scala new file mode 100644 index 0000000000000..7aac02775e1bf --- /dev/null +++ b/core/src/test/scala/org/apache/spark/rpc/akka/AkkaRpcEnvSuite.scala @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.rpc.akka + +import org.apache.spark.rpc._ +import org.apache.spark.{SSLSampleConfigs, SecurityManager, SparkConf} + +class AkkaRpcEnvSuite extends RpcEnvSuite { + + override def createRpcEnv(conf: SparkConf, + name: String, + port: Int, + clientMode: Boolean = false): RpcEnv = { + new AkkaRpcEnvFactory().create( + RpcEnvConfig(conf, name, "localhost", port, new SecurityManager(conf), clientMode)) + } + + test("setupEndpointRef: systemName, address, endpointName") { + val ref = env.setupEndpoint("test_endpoint", new RpcEndpoint { + override val rpcEnv = env + + override def receive = { + case _ => + } + }) + val conf = new SparkConf() + val newRpcEnv = new AkkaRpcEnvFactory().create( + RpcEnvConfig(conf, "test", "localhost", 0, new SecurityManager(conf), false)) + try { + val newRef = newRpcEnv.setupEndpointRef("local", ref.address, "test_endpoint") + assert(s"akka.tcp://local@${env.address}/user/test_endpoint" === + newRef.asInstanceOf[AkkaRpcEndpointRef].actorRef.path.toString) + } finally { + newRpcEnv.shutdown() + } + } + + test("uriOf") { + val uri = env.uriOf("local", RpcAddress("1.2.3.4", 12345), "test_endpoint") + assert("akka.tcp://local@1.2.3.4:12345/user/test_endpoint" === uri) + } + + test("uriOf: ssl") { + val conf = SSLSampleConfigs.sparkSSLConfig() + val securityManager = new SecurityManager(conf) + val rpcEnv = new AkkaRpcEnvFactory().create( + RpcEnvConfig(conf, "test", "localhost", 0, securityManager, false)) + try { + val uri = rpcEnv.uriOf("local", RpcAddress("1.2.3.4", 12345), "test_endpoint") + assert("akka.ssl.tcp://local@1.2.3.4:12345/user/test_endpoint" === uri) + } finally { + rpcEnv.shutdown() + } + } + +} diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcAddressSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcAddressSuite.scala index 4fcdb619f9300..446d5193d0a19 100644 --- a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcAddressSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcAddressSuite.scala @@ -18,7 +18,10 @@ package org.apache.spark.rpc.netty import org.apache.spark.SparkFunSuite +<<<<<<< HEAD +======= import org.apache.spark.rpc.RpcEndpointAddress +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 class NettyRpcAddressSuite extends SparkFunSuite { diff --git a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala index 994a58836bd0d..abc89aa377c49 100644 --- a/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala +++ b/core/src/test/scala/org/apache/spark/rpc/netty/NettyRpcEnvSuite.scala @@ -33,9 +33,15 @@ class NettyRpcEnvSuite extends RpcEnvSuite { } test("non-existent endpoint") { +<<<<<<< HEAD + val uri = env.uriOf("test", env.address, "nonexist-endpoint") + val e = intercept[RpcEndpointNotFoundException] { + env.setupEndpointRef("test", env.address, "nonexist-endpoint") +======= val uri = RpcEndpointAddress(env.address, "nonexist-endpoint").toString val e = intercept[RpcEndpointNotFoundException] { env.setupEndpointRef(env.address, "nonexist-endpoint") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } assert(e.getMessage.contains(uri)) } diff --git a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala index 43da6fc5b5474..a85101d032d66 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/EventLoggingListenerSuite.scala @@ -67,11 +67,19 @@ class EventLoggingListenerSuite extends SparkFunSuite with LocalSparkContext wit val logPath = new Path(eventLogger.logPath + EventLoggingListener.IN_PROGRESS) assert(fileSystem.exists(logPath)) val logStatus = fileSystem.getFileStatus(logPath) +<<<<<<< HEAD + assert(!logStatus.isDir) + + // Verify log is renamed after stop() + eventLogger.stop() + assert(!fileSystem.getFileStatus(new Path(eventLogger.logPath)).isDir) +======= assert(!logStatus.isDirectory) // Verify log is renamed after stop() eventLogger.stop() assert(!fileSystem.getFileStatus(new Path(eventLogger.logPath)).isDirectory) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } test("Basic event logging") { diff --git a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala index 761e82e6cf1ce..3648642dd606d 100644 --- a/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/scheduler/ReplayListenerSuite.scala @@ -23,6 +23,10 @@ import java.net.URI import org.json4s.jackson.JsonMethods._ import org.scalatest.BeforeAndAfter +<<<<<<< HEAD +import org.apache.spark.{SparkConf, SparkContext, SPARK_VERSION} +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.io.CompressionCodec @@ -114,7 +118,11 @@ class ReplayListenerSuite extends SparkFunSuite with BeforeAndAfter { val applications = fileSystem.listStatus(logDirPath) assert(applications != null && applications.size > 0) val eventLog = applications.sortBy(_.getModificationTime).last +<<<<<<< HEAD + assert(!eventLog.isDir) +======= assert(!eventLog.isDirectory) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Replay events val logData = EventLoggingListener.openEventLog(eventLog.getPath(), fileSystem) diff --git a/core/src/test/scala/org/apache/spark/ui/PagedTableSuite.scala b/core/src/test/scala/org/apache/spark/ui/PagedTableSuite.scala index 74eeca282882a..001ff202afcea 100644 --- a/core/src/test/scala/org/apache/spark/ui/PagedTableSuite.scala +++ b/core/src/test/scala/org/apache/spark/ui/PagedTableSuite.scala @@ -64,6 +64,9 @@ class PagedTableSuite extends SparkFunSuite { override def row(t: Int): Seq[Node] = Nil +<<<<<<< HEAD + override def goButtonJavascriptFunction: (String, String) = ("", "") +======= override def pageSizeFormField: String = "pageSize" override def prevPageSizeFormField: String = "prevPageSize" @@ -71,6 +74,7 @@ class PagedTableSuite extends SparkFunSuite { override def pageNumberFormField: String = "page" override def goButtonFormPath: String = "" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } assert(pagedTable.pageNavigation(1, 10, 1) === Nil) diff --git a/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala new file mode 100644 index 0000000000000..0af4b6098bb0a --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/AkkaUtilsSuite.scala @@ -0,0 +1,360 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util + +import scala.collection.mutable.ArrayBuffer + +import java.util.concurrent.TimeoutException + +import akka.actor.ActorNotFound + +import org.apache.spark._ +import org.apache.spark.rpc.RpcEnv +import org.apache.spark.scheduler.MapStatus +import org.apache.spark.storage.{BlockManagerId, ShuffleBlockId} +import org.apache.spark.SSLSampleConfigs._ + + +/** + * Test the AkkaUtils with various security settings. + */ +class AkkaUtilsSuite extends SparkFunSuite with LocalSparkContext with ResetSystemProperties { + + test("remote fetch security bad password") { + val conf = new SparkConf + conf.set("spark.rpc", "akka") + conf.set("spark.authenticate", "true") + conf.set("spark.authenticate.secret", "good") + + val securityManager = new SecurityManager(conf) + val hostname = "localhost" + val rpcEnv = RpcEnv.create("spark", hostname, 0, conf, securityManager) + System.setProperty("spark.hostPort", rpcEnv.address.hostPort) + assert(securityManager.isAuthenticationEnabled() === true) + + val masterTracker = new MapOutputTrackerMaster(conf) + masterTracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, conf)) + + val badconf = new SparkConf + badconf.set("spark.rpc", "akka") + badconf.set("spark.authenticate", "true") + badconf.set("spark.authenticate.secret", "bad") + val securityManagerBad = new SecurityManager(badconf) + + assert(securityManagerBad.isAuthenticationEnabled() === true) + + val slaveRpcEnv = RpcEnv.create("spark-slave", hostname, 0, conf, securityManagerBad) + val slaveTracker = new MapOutputTrackerWorker(conf) + intercept[akka.actor.ActorNotFound] { + slaveTracker.trackerEndpoint = + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) + } + + rpcEnv.shutdown() + slaveRpcEnv.shutdown() + } + + test("remote fetch security off") { + val conf = new SparkConf + conf.set("spark.authenticate", "false") + conf.set("spark.authenticate.secret", "bad") + val securityManager = new SecurityManager(conf) + + val hostname = "localhost" + val rpcEnv = RpcEnv.create("spark", hostname, 0, conf, securityManager) + System.setProperty("spark.hostPort", rpcEnv.address.hostPort) + + assert(securityManager.isAuthenticationEnabled() === false) + + val masterTracker = new MapOutputTrackerMaster(conf) + masterTracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, conf)) + + val badconf = new SparkConf + badconf.set("spark.authenticate", "false") + badconf.set("spark.authenticate.secret", "good") + val securityManagerBad = new SecurityManager(badconf) + + val slaveRpcEnv = RpcEnv.create("spark-slave", hostname, 0, badconf, securityManagerBad) + val slaveTracker = new MapOutputTrackerWorker(conf) + slaveTracker.trackerEndpoint = + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) + + assert(securityManagerBad.isAuthenticationEnabled() === false) + + masterTracker.registerShuffle(10, 1) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) + + val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L)) + masterTracker.registerMapOutput(10, 0, + MapStatus(BlockManagerId("a", "hostA", 1000), Array(1000L))) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) + + // this should succeed since security off + assert(slaveTracker.getMapSizesByExecutorId(10, 0).toSeq === + Seq((BlockManagerId("a", "hostA", 1000), + ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000))))) + + rpcEnv.shutdown() + slaveRpcEnv.shutdown() + } + + test("remote fetch security pass") { + val conf = new SparkConf + conf.set("spark.authenticate", "true") + conf.set("spark.authenticate.secret", "good") + val securityManager = new SecurityManager(conf) + + val hostname = "localhost" + val rpcEnv = RpcEnv.create("spark", hostname, 0, conf, securityManager) + System.setProperty("spark.hostPort", rpcEnv.address.hostPort) + + assert(securityManager.isAuthenticationEnabled() === true) + + val masterTracker = new MapOutputTrackerMaster(conf) + masterTracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, conf)) + + val goodconf = new SparkConf + goodconf.set("spark.authenticate", "true") + goodconf.set("spark.authenticate.secret", "good") + val securityManagerGood = new SecurityManager(goodconf) + + assert(securityManagerGood.isAuthenticationEnabled() === true) + + val slaveRpcEnv = RpcEnv.create("spark-slave", hostname, 0, goodconf, securityManagerGood) + val slaveTracker = new MapOutputTrackerWorker(conf) + slaveTracker.trackerEndpoint = + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) + + masterTracker.registerShuffle(10, 1) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) + + val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L)) + masterTracker.registerMapOutput(10, 0, MapStatus( + BlockManagerId("a", "hostA", 1000), Array(1000L))) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) + + // this should succeed since security on and passwords match + assert(slaveTracker.getMapSizesByExecutorId(10, 0) === + Seq((BlockManagerId("a", "hostA", 1000), + ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000))))) + + rpcEnv.shutdown() + slaveRpcEnv.shutdown() + } + + test("remote fetch security off client") { + val conf = new SparkConf + conf.set("spark.rpc", "akka") + conf.set("spark.authenticate", "true") + conf.set("spark.authenticate.secret", "good") + + val securityManager = new SecurityManager(conf) + + val hostname = "localhost" + val rpcEnv = RpcEnv.create("spark", hostname, 0, conf, securityManager) + System.setProperty("spark.hostPort", rpcEnv.address.hostPort) + + assert(securityManager.isAuthenticationEnabled() === true) + + val masterTracker = new MapOutputTrackerMaster(conf) + masterTracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, conf)) + + val badconf = new SparkConf + badconf.set("spark.rpc", "akka") + badconf.set("spark.authenticate", "false") + badconf.set("spark.authenticate.secret", "bad") + val securityManagerBad = new SecurityManager(badconf) + + assert(securityManagerBad.isAuthenticationEnabled() === false) + + val slaveRpcEnv = RpcEnv.create("spark-slave", hostname, 0, badconf, securityManagerBad) + val slaveTracker = new MapOutputTrackerWorker(conf) + intercept[akka.actor.ActorNotFound] { + slaveTracker.trackerEndpoint = + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) + } + + rpcEnv.shutdown() + slaveRpcEnv.shutdown() + } + + test("remote fetch ssl on") { + val conf = sparkSSLConfig() + val securityManager = new SecurityManager(conf) + + val hostname = "localhost" + val rpcEnv = RpcEnv.create("spark", hostname, 0, conf, securityManager) + System.setProperty("spark.hostPort", rpcEnv.address.hostPort) + + assert(securityManager.isAuthenticationEnabled() === false) + + val masterTracker = new MapOutputTrackerMaster(conf) + masterTracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, conf)) + + val slaveConf = sparkSSLConfig() + val securityManagerBad = new SecurityManager(slaveConf) + + val slaveRpcEnv = RpcEnv.create("spark-slaves", hostname, 0, slaveConf, securityManagerBad) + val slaveTracker = new MapOutputTrackerWorker(conf) + slaveTracker.trackerEndpoint = + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) + + assert(securityManagerBad.isAuthenticationEnabled() === false) + + masterTracker.registerShuffle(10, 1) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) + + val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L)) + masterTracker.registerMapOutput(10, 0, + MapStatus(BlockManagerId("a", "hostA", 1000), Array(1000L))) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) + + // this should succeed since security off + assert(slaveTracker.getMapSizesByExecutorId(10, 0) === + Seq((BlockManagerId("a", "hostA", 1000), ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000))))) + + rpcEnv.shutdown() + slaveRpcEnv.shutdown() + } + + + test("remote fetch ssl on and security enabled") { + val conf = sparkSSLConfig() + conf.set("spark.authenticate", "true") + conf.set("spark.authenticate.secret", "good") + val securityManager = new SecurityManager(conf) + + val hostname = "localhost" + val rpcEnv = RpcEnv.create("spark", hostname, 0, conf, securityManager) + System.setProperty("spark.hostPort", rpcEnv.address.hostPort) + + assert(securityManager.isAuthenticationEnabled() === true) + + val masterTracker = new MapOutputTrackerMaster(conf) + masterTracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, conf)) + + val slaveConf = sparkSSLConfig() + slaveConf.set("spark.authenticate", "true") + slaveConf.set("spark.authenticate.secret", "good") + val securityManagerBad = new SecurityManager(slaveConf) + + val slaveRpcEnv = RpcEnv.create("spark-slave", hostname, 0, slaveConf, securityManagerBad) + val slaveTracker = new MapOutputTrackerWorker(conf) + slaveTracker.trackerEndpoint = + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) + + assert(securityManagerBad.isAuthenticationEnabled() === true) + + masterTracker.registerShuffle(10, 1) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) + + val size1000 = MapStatus.decompressSize(MapStatus.compressSize(1000L)) + masterTracker.registerMapOutput(10, 0, + MapStatus(BlockManagerId("a", "hostA", 1000), Array(1000L))) + masterTracker.incrementEpoch() + slaveTracker.updateEpoch(masterTracker.getEpoch) + + assert(slaveTracker.getMapSizesByExecutorId(10, 0) === + Seq((BlockManagerId("a", "hostA", 1000), ArrayBuffer((ShuffleBlockId(10, 0, 0), size1000))))) + + rpcEnv.shutdown() + slaveRpcEnv.shutdown() + } + + + test("remote fetch ssl on and security enabled - bad credentials") { + val conf = sparkSSLConfig() + conf.set("spark.rpc", "akka") + conf.set("spark.authenticate", "true") + conf.set("spark.authenticate.secret", "good") + val securityManager = new SecurityManager(conf) + + val hostname = "localhost" + val rpcEnv = RpcEnv.create("spark", hostname, 0, conf, securityManager) + System.setProperty("spark.hostPort", rpcEnv.address.hostPort) + + assert(securityManager.isAuthenticationEnabled() === true) + + val masterTracker = new MapOutputTrackerMaster(conf) + masterTracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, conf)) + + val slaveConf = sparkSSLConfig() + slaveConf.set("spark.rpc", "akka") + slaveConf.set("spark.authenticate", "true") + slaveConf.set("spark.authenticate.secret", "bad") + val securityManagerBad = new SecurityManager(slaveConf) + + val slaveRpcEnv = RpcEnv.create("spark-slave", hostname, 0, slaveConf, securityManagerBad) + val slaveTracker = new MapOutputTrackerWorker(conf) + intercept[akka.actor.ActorNotFound] { + slaveTracker.trackerEndpoint = + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) + } + + rpcEnv.shutdown() + slaveRpcEnv.shutdown() + } + + + test("remote fetch ssl on - untrusted server") { + val conf = sparkSSLConfigUntrusted() + val securityManager = new SecurityManager(conf) + + val hostname = "localhost" + val rpcEnv = RpcEnv.create("spark", hostname, 0, conf, securityManager) + System.setProperty("spark.hostPort", rpcEnv.address.hostPort) + + assert(securityManager.isAuthenticationEnabled() === false) + + val masterTracker = new MapOutputTrackerMaster(conf) + masterTracker.trackerEndpoint = rpcEnv.setupEndpoint(MapOutputTracker.ENDPOINT_NAME, + new MapOutputTrackerMasterEndpoint(rpcEnv, masterTracker, conf)) + + val slaveConf = sparkSSLConfig() + .set("spark.rpc.askTimeout", "5s") + .set("spark.rpc.lookupTimeout", "5s") + val securityManagerBad = new SecurityManager(slaveConf) + + val slaveRpcEnv = RpcEnv.create("spark-slave", hostname, 0, slaveConf, securityManagerBad) + try { + slaveRpcEnv.setupEndpointRef("spark", rpcEnv.address, MapOutputTracker.ENDPOINT_NAME) + fail("should receive either ActorNotFound or TimeoutException") + } catch { + case e: ActorNotFound => + case e: TimeoutException => + } + + rpcEnv.shutdown() + slaveRpcEnv.shutdown() + } + +} diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 6501721572f98..d4263c093a631 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -124,7 +124,10 @@ def run_tests(tests_timeout): ERROR_CODES["BLOCK_R_STYLE"]: 'R style tests', ERROR_CODES["BLOCK_DOCUMENTATION"]: 'to generate documentation', ERROR_CODES["BLOCK_BUILD"]: 'to build', +<<<<<<< HEAD +======= ERROR_CODES["BLOCK_BUILD_TESTS"]: 'build dependency tests', +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests', ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests', ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests', @@ -194,6 +197,10 @@ def main(): pr_tests = [ "pr_merge_ability", "pr_public_classes" +<<<<<<< HEAD + # DISABLED (pwendell) "pr_new_dependencies" +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 ] # `bind_message_base` returns a function to generate messages for Github posting diff --git a/dev/run-tests.py b/dev/run-tests.py index 23278d298c22d..bbd6e5c5bd88a 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -417,12 +417,15 @@ def run_python_tests(test_modules, parallelism): run_cmd(command) +<<<<<<< HEAD +======= def run_build_tests(): # set_title_and_block("Running build tests", "BLOCK_BUILD_TESTS") # run_cmd([os.path.join(SPARK_HOME, "dev", "test-dependencies.sh")]) pass +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def run_sparkr_tests(): set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS") @@ -543,9 +546,12 @@ def main(): # if "DOCS" in changed_modules and test_env == "amplab_jenkins": # build_spark_documentation() +<<<<<<< HEAD +======= if any(m.should_run_build_tests for m in test_modules): run_build_tests() +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 # spark build build_apache_spark(build_tool, hadoop_version) diff --git a/dev/sparktestsupport/__init__.py b/dev/sparktestsupport/__init__.py index 89015f8c4fb9c..8146134f66004 100644 --- a/dev/sparktestsupport/__init__.py +++ b/dev/sparktestsupport/__init__.py @@ -32,6 +32,9 @@ "BLOCK_PYSPARK_UNIT_TESTS": 19, "BLOCK_SPARKR_UNIT_TESTS": 20, "BLOCK_JAVA_STYLE": 21, +<<<<<<< HEAD +======= "BLOCK_BUILD_TESTS": 22, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 "BLOCK_TIMEOUT": 124 } diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 47cd600bd18a4..92a616b9929fe 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -31,7 +31,11 @@ class Module(object): def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={}, sbt_test_goals=(), python_test_goals=(), blacklisted_python_implementations=(), +<<<<<<< HEAD + test_tags=(), should_run_r_tests=False): +======= test_tags=(), should_run_r_tests=False, should_run_build_tests=False): +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 """ Define a new module. @@ -53,7 +57,10 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags= :param test_tags A set of tags that will be excluded when running unit tests if the module is not explicitly changed. :param should_run_r_tests: If true, changes in this module will trigger all R tests. +<<<<<<< HEAD +======= :param should_run_build_tests: If true, changes in this module will trigger build tests. +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 """ self.name = name self.dependencies = dependencies @@ -65,7 +72,10 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags= self.blacklisted_python_implementations = blacklisted_python_implementations self.test_tags = test_tags self.should_run_r_tests = should_run_r_tests +<<<<<<< HEAD +======= self.should_run_build_tests = should_run_build_tests +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 self.dependent_modules = set() for dep in dependencies: @@ -396,6 +406,8 @@ def contains_file(self, filename): ] ) +<<<<<<< HEAD +======= build = Module( name="build", dependencies=[], @@ -405,6 +417,7 @@ def contains_file(self, filename): ], should_run_build_tests=True ) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 ec2 = Module( name="ec2", @@ -444,6 +457,10 @@ def contains_file(self, filename): "test", ], python_test_goals=list(itertools.chain.from_iterable(m.python_test_goals for m in all_modules)), +<<<<<<< HEAD + should_run_r_tests=True +======= should_run_r_tests=True, should_run_build_tests=True +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 ) diff --git a/dev/tests/pr_new_dependencies.sh b/dev/tests/pr_new_dependencies.sh new file mode 100755 index 0000000000000..fdfb3c62aff58 --- /dev/null +++ b/dev/tests/pr_new_dependencies.sh @@ -0,0 +1,117 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# +# This script follows the base format for testing pull requests against +# another branch and returning results to be published. More details can be +# found at dev/run-tests-jenkins. +# +# Arg1: The Github Pull Request Actual Commit +#+ known as `ghprbActualCommit` in `run-tests-jenkins` +# Arg2: The SHA1 hash +#+ known as `sha1` in `run-tests-jenkins` +# Arg3: Current PR Commit Hash +#+ the PR hash for the current commit +# + +ghprbActualCommit="$1" +sha1="$2" +current_pr_head="$3" + +MVN_BIN="build/mvn" +CURR_CP_FILE="my-classpath.txt" +MASTER_CP_FILE="master-classpath.txt" + +# First switch over to the master branch +git checkout -f master +# Find and copy all pom.xml files into a *.gate file that we can check +# against through various `git` changes +find -name "pom.xml" -exec cp {} {}.gate \; +# Switch back to the current PR +git checkout -f "${current_pr_head}" + +# Check if any *.pom files from the current branch are different from the master +difference_q="" +for p in $(find -name "pom.xml"); do + [[ -f "${p}" && -f "${p}.gate" ]] && \ + difference_q="${difference_q}$(diff $p.gate $p)" +done + +# If no pom files were changed we can easily say no new dependencies were added +if [ -z "${difference_q}" ]; then + echo " * This patch does not change any dependencies." +else + # Else we need to manually build spark to determine what, if any, dependencies + # were added into the Spark assembly jar + ${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \ + sed -n -e '/Building Spark Project Assembly/,$p' | \ + grep --context=1 -m 2 "Dependencies classpath:" | \ + head -n 3 | \ + tail -n 1 | \ + tr ":" "\n" | \ + rev | \ + cut -d "/" -f 1 | \ + rev | \ + sort > ${CURR_CP_FILE} + + # Checkout the master branch to compare against + git checkout -f master + + ${MVN_BIN} clean package dependency:build-classpath -DskipTests 2>/dev/null | \ + sed -n -e '/Building Spark Project Assembly/,$p' | \ + grep --context=1 -m 2 "Dependencies classpath:" | \ + head -n 3 | \ + tail -n 1 | \ + tr ":" "\n" | \ + rev | \ + cut -d "/" -f 1 | \ + rev | \ + sort > ${MASTER_CP_FILE} + + DIFF_RESULTS="`diff ${CURR_CP_FILE} ${MASTER_CP_FILE}`" + + if [ -z "${DIFF_RESULTS}" ]; then + echo " * This patch does not change any dependencies." + else + # Pretty print the new dependencies + added_deps=$(echo "${DIFF_RESULTS}" | grep "<" | cut -d' ' -f2 | awk '{printf " * \`"$1"\`\\n"}') + removed_deps=$(echo "${DIFF_RESULTS}" | grep ">" | cut -d' ' -f2 | awk '{printf " * \`"$1"\`\\n"}') + added_deps_text=" * This patch **adds the following new dependencies:**\n${added_deps}" + removed_deps_text=" * This patch **removes the following dependencies:**\n${removed_deps}" + + # Construct the final returned message with proper + return_mssg="" + [ -n "${added_deps}" ] && return_mssg="${added_deps_text}" + if [ -n "${removed_deps}" ]; then + if [ -n "${return_mssg}" ]; then + return_mssg="${return_mssg}\n${removed_deps_text}" + else + return_mssg="${removed_deps_text}" + fi + fi + echo "${return_mssg}" + fi + + # Remove the files we've left over + [ -f "${CURR_CP_FILE}" ] && rm -f "${CURR_CP_FILE}" + [ -f "${MASTER_CP_FILE}" ] && rm -f "${MASTER_CP_FILE}" + + # Clean up our mess from the Maven builds just in case + ${MVN_BIN} clean &>/dev/null +fi diff --git a/dev/tests/pr_public_classes.sh b/dev/tests/pr_public_classes.sh index 41c5d3ee8cb3c..92dc2d1771270 100755 --- a/dev/tests/pr_public_classes.sh +++ b/dev/tests/pr_public_classes.sh @@ -24,6 +24,20 @@ # # Arg1: The Github Pull Request Actual Commit #+ known as `ghprbActualCommit` in `run-tests-jenkins` +<<<<<<< HEAD +# Arg2: The SHA1 hash +#+ known as `sha1` in `run-tests-jenkins` +# + +# We diff master...$ghprbActualCommit because that gets us changes introduced in the PR +#+ and not anything else added to master since the PR was branched. + +ghprbActualCommit="$1" +sha1="$2" + +source_files=$( + git diff master...$ghprbActualCommit --name-only `# diff patch against master from branch point` \ +======= ghprbActualCommit="$1" @@ -43,25 +57,42 @@ fi source_files=$( git diff $ghprbActualCommit^...$ghprbActualCommit --name-only `# diff patch against master from branch point` \ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 | grep -v -e "\/test" `# ignore files in test directories` \ | grep -e "\.py$" -e "\.java$" -e "\.scala$" `# include only code files` \ | tr "\n" " " ) +<<<<<<< HEAD +new_public_classes=$( + git diff master...$ghprbActualCommit ${source_files} `# diff patch against master from branch point` \ + | grep "^\+" `# filter in only added lines` \ + | sed -r -e "s/^\+//g" `# remove the leading +` \ +======= new_public_classes=$( git diff $ghprbActualCommit^...$ghprbActualCommit ${source_files} `# diff patch against master from branch point` \ | grep "^\+" `# filter in only added lines` \ | $SED -e "s/^\+//g" `# remove the leading +` \ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 | grep -e "trait " -e "class " `# filter in lines with these key words` \ | grep -e "{" -e "(" `# filter in lines with these key words, too` \ | grep -v -e "\@\@" -e "private" `# exclude lines with these words` \ | grep -v -e "^// " -e "^/\*" -e "^ \* " `# exclude comment lines` \ +<<<<<<< HEAD + | sed -r -e "s/\{.*//g" `# remove from the { onwards` \ + | sed -r -e "s/\}//g" `# just in case, remove }; they mess the JSON` \ + | sed -r -e "s/\"/\\\\\"/g" `# escape double quotes; they mess the JSON` \ + | sed -r -e "s/^(.*)$/\`\1\`/g" `# surround with backticks for style` \ + | sed -r -e "s/^/ \* /g" `# prepend ' *' to start of line` \ + | sed -r -e "s/$/\\\n/g" `# append newline to end of line` \ +======= | $SED -e "s/\{.*//g" `# remove from the { onwards` \ | $SED -e "s/\}//g" `# just in case, remove }; they mess the JSON` \ | $SED -e "s/\"/\\\\\"/g" `# escape double quotes; they mess the JSON` \ | $SED -e "s/^(.*)$/\`\1\`/g" `# surround with backticks for style` \ | $SED -e "s/^/ \* /g" `# prepend ' *' to start of line` \ | $SED -e "s/$/\\\n/g" `# append newline to end of line` \ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 | tr -d "\n" `# remove actual LF characters` ) @@ -69,5 +100,9 @@ if [ -z "$new_public_classes" ]; then echo " * This patch adds no public classes." else public_classes_note=" * This patch adds the following public classes _(experimental)_:" +<<<<<<< HEAD + echo "${public_classes_note}\n${new_public_classes}" +======= echo -e "${public_classes_note}\n${new_public_classes}" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 fi diff --git a/docs/configuration.md b/docs/configuration.md index 7d743d572b582..83d9ad2913e27 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -824,6 +824,16 @@ Apart from these, the following properties are also available, and may be useful +<<<<<<< HEAD + spark.broadcast.factory + org.apache.spark.broadcast.
    TorrentBroadcastFactory + + Which broadcast implementation to use. + + + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 spark.cleaner.ttl (infinite) @@ -1011,6 +1021,17 @@ Apart from these, the following properties are also available, and may be useful +<<<<<<< HEAD + spark.broadcast.port + (random) + + Port for the driver's HTTP broadcast server to listen on. + This is not relevant for torrent broadcast. + + + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 spark.driver.host (local hostname) @@ -1429,8 +1450,13 @@ Apart from these, the following properties are also available, and may be useful

    Use spark.ssl.YYY.XXX settings to overwrite the global configuration for particular protocol denoted by YYY. Currently YYY can be +<<<<<<< HEAD + either akka for Akka based connections or fs for broadcast and + file server.

    +======= either akka for Akka based connections or fs for file server.

    +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 diff --git a/docs/security.md b/docs/security.md index 1b7741d4dd93c..a4a0df5f72849 100644 --- a/docs/security.md +++ b/docs/security.md @@ -23,7 +23,11 @@ If your applications are using event logging, the directory where the event logs ## Encryption +<<<<<<< HEAD +Spark supports SSL for Akka and HTTP (for broadcast and file server) protocols. SASL encryption is +======= Spark supports SSL for Akka and HTTP (for file server) protocols. SASL encryption is +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 supported for the block transfer service. Encryption is not yet supported for the WebUI. Encryption is not yet supported for data stored by Spark in temporary local storage, such as shuffle @@ -32,7 +36,11 @@ to configure your cluster manager to store application data on encrypted disks. ### SSL Configuration +<<<<<<< HEAD +Configuration for SSL is organized hierarchically. The user can configure the default SSL settings which will be used for all the supported communication protocols unless they are overwritten by protocol-specific settings. This way the user can easily provide the common settings for all the protocols without disabling the ability to configure each one individually. The common SSL settings are at `spark.ssl` namespace in Spark configuration, while Akka SSL configuration is at `spark.ssl.akka` and HTTP for broadcast and file server SSL configuration is at `spark.ssl.fs`. The full breakdown can be found on the [configuration page](configuration.html). +======= Configuration for SSL is organized hierarchically. The user can configure the default SSL settings which will be used for all the supported communication protocols unless they are overwritten by protocol-specific settings. This way the user can easily provide the common settings for all the protocols without disabling the ability to configure each one individually. The common SSL settings are at `spark.ssl` namespace in Spark configuration, while Akka SSL configuration is at `spark.ssl.akka` and HTTP for file server SSL configuration is at `spark.ssl.fs`. The full breakdown can be found on the [configuration page](configuration.html). +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 SSL must be configured on each node and configured for each component involved in communication using the particular protocol. @@ -161,6 +169,18 @@ configure those ports. Jetty-based. Only used if Akka RPC backend is configured. +<<<<<<< HEAD + Executor + Driver + (random) + HTTP Broadcast + spark.broadcast.port + Jetty-based. Not used by TorrentBroadcast, which sends data through the block manager + instead. + + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 Executor / Driver Executor / Driver (random) diff --git a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala index 3da5236745b51..47e5172ea9f46 100644 --- a/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/BroadcastTest.scala @@ -21,14 +21,26 @@ package org.apache.spark.examples import org.apache.spark.{SparkConf, SparkContext} /** +<<<<<<< HEAD + * Usage: BroadcastTest [slices] [numElem] [broadcastAlgo] [blockSize] +======= * Usage: BroadcastTest [slices] [numElem] [blockSize] +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 */ object BroadcastTest { def main(args: Array[String]) { +<<<<<<< HEAD + val bcName = if (args.length > 2) args(2) else "Http" + val blockSize = if (args.length > 3) args(3) else "4096" + + val sparkConf = new SparkConf().setAppName("Broadcast Test") + .set("spark.broadcast.factory", s"org.apache.spark.broadcast.${bcName}BroadcastFactory") +======= val blockSize = if (args.length > 2) args(2) else "4096" val sparkConf = new SparkConf().setAppName("Broadcast Test") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 .set("spark.broadcast.blockSize", blockSize) val sc = new SparkContext(sparkConf) @@ -42,7 +54,11 @@ object BroadcastTest { println("===========") val startTime = System.nanoTime val barr1 = sc.broadcast(arr1) +<<<<<<< HEAD + val observedSizes = sc.parallelize(1 to 10, slices).map(_ => barr1.value.size) +======= val observedSizes = sc.parallelize(1 to 10, slices).map(_ => barr1.value.length) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Collect the small RDD so we can print the observed sizes locally. observedSizes.collect().foreach(i => println(i)) println("Iteration %d took %.0f milliseconds".format(i, (System.nanoTime - startTime) / 1E6)) diff --git a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala index 5a80985a49458..da09a6a10ded7 100644 --- a/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/CassandraCQLTest.scala @@ -16,6 +16,10 @@ */ // scalastyle:off println +<<<<<<< HEAD + // scalastyle:off jobcontext +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 package org.apache.spark.examples import java.nio.ByteBuffer @@ -79,7 +83,11 @@ object CassandraCQLTest { val InputColumnFamily = "ordercf" val OutputColumnFamily = "salecount" +<<<<<<< HEAD + val job = new Job() +======= val job = Job.getInstance() +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 job.setInputFormatClass(classOf[CqlPagingInputFormat]) val configuration = job.getConfiguration ConfigHelper.setInputInitialAddress(job.getConfiguration(), cHost) @@ -136,3 +144,7 @@ object CassandraCQLTest { } } // scalastyle:on println +<<<<<<< HEAD +// scalastyle:on jobcontext +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 diff --git a/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala index ad39a012b4ae6..19d3cc1e7ae7d 100644 --- a/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala +++ b/examples/src/main/scala/org/apache/spark/examples/CassandraTest.scala @@ -16,6 +16,10 @@ */ // scalastyle:off println +<<<<<<< HEAD +// scalastyle:off jobcontext +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 package org.apache.spark.examples import java.nio.ByteBuffer @@ -58,7 +62,11 @@ object CassandraTest { val sc = new SparkContext(sparkConf) // Build the job configuration with ConfigHelper provided by Cassandra +<<<<<<< HEAD + val job = new Job() +======= val job = Job.getInstance() +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 job.setInputFormatClass(classOf[ColumnFamilyInputFormat]) val host: String = args(1) @@ -130,6 +138,10 @@ object CassandraTest { } } // scalastyle:on println +<<<<<<< HEAD +// scalastyle:on jobcontext +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /* create keyspace casDemo; diff --git a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala index b15c2097e550c..51ff4e1054213 100644 --- a/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala +++ b/external/flume-sink/src/main/scala/org/apache/spark/streaming/flume/sink/TransactionProcessor.scala @@ -22,7 +22,11 @@ import java.util.concurrent.{Callable, CountDownLatch, TimeUnit} import scala.util.control.Breaks +<<<<<<< HEAD +import org.apache.flume.{Transaction, Channel} +======= import org.apache.flume.{Channel, Transaction} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Flume forces transactions to be thread-local (horrible, I know!) // So the sink basically spawns a new thread to pull the events out within a transaction. diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala index 5c773d4b07cf6..81985f7a17c97 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/EventTransformer.scala @@ -17,12 +17,21 @@ package org.apache.spark.streaming.flume +<<<<<<< HEAD +import java.io.{ObjectOutput, ObjectInput} + +import scala.collection.JavaConverters._ + +import org.apache.spark.util.Utils +import org.apache.spark.Logging +======= import java.io.{ObjectInput, ObjectOutput} import scala.collection.JavaConverters._ import org.apache.spark.Logging import org.apache.spark.util.Utils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A simple object that provides the implementation of readExternal and writeExternal for both diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala index 1bfa35a8b3d1d..47dfb04e05e69 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeInputDStream.scala @@ -17,14 +17,36 @@ package org.apache.spark.streaming.flume +<<<<<<< HEAD +import java.net.InetSocketAddress +import java.io.{ObjectInput, ObjectOutput, Externalizable} +======= import java.io.{Externalizable, ObjectInput, ObjectOutput} import java.net.InetSocketAddress +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.nio.ByteBuffer import java.util.concurrent.Executors import scala.collection.JavaConverters._ import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.flume.source.avro.AvroSourceProtocol +import org.apache.flume.source.avro.AvroFlumeEvent +import org.apache.flume.source.avro.Status +import org.apache.avro.ipc.specific.SpecificResponder +import org.apache.avro.ipc.NettyServer +import org.apache.spark.Logging +import org.apache.spark.util.Utils +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.dstream._ +import org.apache.spark.streaming.StreamingContext +import org.apache.spark.streaming.receiver.Receiver + +import org.jboss.netty.channel.{ChannelPipeline, ChannelPipelineFactory, Channels} +import org.jboss.netty.channel.socket.nio.NioServerSocketChannelFactory +import org.jboss.netty.handler.codec.compression._ +======= import org.apache.avro.ipc.NettyServer import org.apache.avro.ipc.specific.SpecificResponder import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol, Status} @@ -38,6 +60,7 @@ import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream._ import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.Utils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class FlumeInputDStream[T: ClassTag]( diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala index d9c25e86540db..641b6da89ec35 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumePollingInputDStream.scala @@ -32,8 +32,13 @@ import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.dstream.ReceiverInputDStream +<<<<<<< HEAD +import org.apache.spark.streaming.receiver.Receiver +import org.apache.spark.streaming.flume.sink._ +======= import org.apache.spark.streaming.flume.sink._ import org.apache.spark.streaming.receiver.Receiver +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A [[ReceiverInputDStream]] that can be used to read data from several Flume agents running diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeTestUtils.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeTestUtils.scala index 3f87ce46e5952..2170baefcc360 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeTestUtils.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeTestUtils.scala @@ -29,7 +29,11 @@ import org.apache.avro.ipc.NettyTransceiver import org.apache.avro.ipc.specific.SpecificRequestor import org.apache.commons.lang3.RandomUtils import org.apache.flume.source.avro +<<<<<<< HEAD +import org.apache.flume.source.avro.{AvroSourceProtocol, AvroFlumeEvent} +======= import org.apache.flume.source.avro.{AvroFlumeEvent, AvroSourceProtocol} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.jboss.netty.channel.ChannelPipeline import org.jboss.netty.channel.socket.SocketChannel import org.jboss.netty.channel.socket.nio.NioClientSocketChannelFactory diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala index 3e3ed712f0dbf..7422e161ee01e 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/FlumeUtils.scala @@ -17,8 +17,13 @@ package org.apache.spark.streaming.flume +<<<<<<< HEAD +import java.net.InetSocketAddress +import java.io.{DataOutputStream, ByteArrayOutputStream} +======= import java.io.{ByteArrayOutputStream, DataOutputStream} import java.net.InetSocketAddress +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.util.{List => JList, Map => JMap} import scala.collection.JavaConverters._ @@ -30,6 +35,10 @@ import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaReceiverInputDStream, JavaStreamingContext} import org.apache.spark.streaming.dstream.ReceiverInputDStream +<<<<<<< HEAD + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 object FlumeUtils { private val DEFAULT_POLLING_PARALLELISM = 5 private val DEFAULT_POLLING_BATCH_SIZE = 1000 diff --git a/external/flume/src/main/scala/org/apache/spark/streaming/flume/PollingFlumeTestUtils.scala b/external/flume/src/main/scala/org/apache/spark/streaming/flume/PollingFlumeTestUtils.scala index 9515d07c5ee5b..34f610f9a0c43 100644 --- a/external/flume/src/main/scala/org/apache/spark/streaming/flume/PollingFlumeTestUtils.scala +++ b/external/flume/src/main/scala/org/apache/spark/streaming/flume/PollingFlumeTestUtils.scala @@ -17,8 +17,13 @@ package org.apache.spark.streaming.flume +<<<<<<< HEAD +import java.util.concurrent._ +import java.util.{Collections, List => JList, Map => JMap} +======= import java.util.{Collections, List => JList, Map => JMap} import java.util.concurrent._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.collection.mutable.ArrayBuffer @@ -28,7 +33,11 @@ import org.apache.flume.Context import org.apache.flume.channel.MemoryChannel import org.apache.flume.conf.Configurables +<<<<<<< HEAD +import org.apache.spark.streaming.flume.sink.{SparkSinkConfig, SparkSink} +======= import org.apache.spark.streaming.flume.sink.{SparkSink, SparkSinkConfig} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Share codes for Scala and Python unit tests diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala index c4e18d92eefa9..9879b74fe3ddb 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaCluster.scala @@ -17,6 +17,16 @@ package org.apache.spark.streaming.kafka +<<<<<<< HEAD +import scala.util.control.NonFatal +import scala.util.Random +import scala.collection.mutable.ArrayBuffer +import scala.collection.JavaConverters._ +import java.util.Properties +import kafka.api._ +import kafka.common.{ErrorMapping, OffsetAndMetadata, OffsetMetadataAndError, TopicAndPartition} +import kafka.consumer.{ConsumerConfig, SimpleConsumer} +======= import java.util.Properties import scala.collection.JavaConverters._ @@ -28,6 +38,7 @@ import kafka.api._ import kafka.common.{ErrorMapping, OffsetAndMetadata, OffsetMetadataAndError, TopicAndPartition} import kafka.consumer.{ConsumerConfig, SimpleConsumer} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.SparkException /** diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala index 67f2360896b16..1ab6eb74c3c29 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaInputDStream.scala @@ -22,7 +22,11 @@ import java.util.Properties import scala.collection.Map import scala.reflect.{classTag, ClassTag} +<<<<<<< HEAD +import kafka.consumer.{KafkaStream, Consumer, ConsumerConfig, ConsumerConnector} +======= import kafka.consumer.{Consumer, ConsumerConfig, ConsumerConnector, KafkaStream} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import kafka.serializer.Decoder import kafka.utils.VerifiableProperties diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala index 603be22818206..1ad659c4ca67a 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaRDD.scala @@ -20,6 +20,14 @@ package org.apache.spark.streaming.kafka import scala.collection.mutable.ArrayBuffer import scala.reflect.{classTag, ClassTag} +<<<<<<< HEAD +import org.apache.spark.{Logging, Partition, SparkContext, SparkException, TaskContext} +import org.apache.spark.partial.{PartialResult, BoundedDouble} +import org.apache.spark.rdd.RDD +import org.apache.spark.util.NextIterator + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import kafka.api.{FetchRequestBuilder, FetchResponse} import kafka.common.{ErrorMapping, TopicAndPartition} import kafka.consumer.SimpleConsumer @@ -27,11 +35,14 @@ import kafka.message.{MessageAndMetadata, MessageAndOffset} import kafka.serializer.Decoder import kafka.utils.VerifiableProperties +<<<<<<< HEAD +======= import org.apache.spark.{Logging, Partition, SparkContext, SparkException, TaskContext} import org.apache.spark.partial.{BoundedDouble, PartialResult} import org.apache.spark.rdd.RDD import org.apache.spark.util.NextIterator +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A batch-oriented interface for consuming from Kafka. * Starting and ending offsets are specified in advance, diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala index a76fa6671a4b0..e5ab6e453c994 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaTestUtils.scala @@ -20,8 +20,13 @@ package org.apache.spark.streaming.kafka import java.io.File import java.lang.{Integer => JInt} import java.net.InetSocketAddress +<<<<<<< HEAD +import java.util.concurrent.TimeoutException +import java.util.{Map => JMap, Properties} +======= import java.util.{Map => JMap, Properties} import java.util.concurrent.TimeoutException +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.annotation.tailrec import scala.collection.JavaConverters._ @@ -37,9 +42,15 @@ import kafka.utils.{ZKStringSerializer, ZkUtils} import org.I0Itec.zkclient.ZkClient import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer} +<<<<<<< HEAD +import org.apache.spark.streaming.Time +import org.apache.spark.util.Utils +import org.apache.spark.{Logging, SparkConf} +======= import org.apache.spark.{Logging, SparkConf} import org.apache.spark.streaming.Time import org.apache.spark.util.Utils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * This is a helper class for Kafka test suites. This has the functionality to set up diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala index 0cb875c9758f9..de6673431d9c3 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/KafkaUtils.scala @@ -27,19 +27,32 @@ import scala.reflect.ClassTag import com.google.common.base.Charsets.UTF_8 import kafka.common.TopicAndPartition import kafka.message.MessageAndMetadata +<<<<<<< HEAD +import kafka.serializer.{DefaultDecoder, Decoder, StringDecoder} +import net.razorvine.pickle.{Opcodes, Pickler, IObjectPickler} + +import org.apache.spark.api.java.function.{Function => JFunction} +import org.apache.spark.streaming.util.WriteAheadLogUtils +import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.api.java.{JavaSparkContext, JavaPairRDD, JavaRDD} +======= import kafka.serializer.{Decoder, DefaultDecoder, StringDecoder} import net.razorvine.pickle.{IObjectPickler, Opcodes, Pickler} import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.api.java.{JavaPairRDD, JavaRDD, JavaSparkContext} import org.apache.spark.api.java.function.{Function => JFunction} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.api.python.SerDeUtil import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java._ import org.apache.spark.streaming.dstream.{DStream, InputDStream, ReceiverInputDStream} +<<<<<<< HEAD +======= import org.apache.spark.streaming.util.WriteAheadLogUtils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 object KafkaUtils { /** diff --git a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala index a872781b78eeb..7d36fb65c4458 100644 --- a/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala +++ b/external/kafka/src/main/scala/org/apache/spark/streaming/kafka/ReliableKafkaReceiver.scala @@ -18,10 +18,17 @@ package org.apache.spark.streaming.kafka import java.util.Properties +<<<<<<< HEAD +import java.util.concurrent.{ThreadPoolExecutor, ConcurrentHashMap} + +import scala.collection.{Map, mutable} +import scala.reflect.{ClassTag, classTag} +======= import java.util.concurrent.{ConcurrentHashMap, ThreadPoolExecutor} import scala.collection.{mutable, Map} import scala.reflect.{classTag, ClassTag} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import kafka.common.TopicAndPartition import kafka.consumer.{Consumer, ConsumerConfig, ConsumerConnector, KafkaStream} diff --git a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala index a48eec70b9f78..b399aa4e04a85 100644 --- a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala +++ b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterInputDStream.scala @@ -19,6 +19,15 @@ package org.apache.spark.streaming.twitter import twitter4j._ import twitter4j.auth.Authorization +<<<<<<< HEAD +import twitter4j.conf.ConfigurationBuilder +import twitter4j.auth.OAuthAuthorization + +import org.apache.spark.streaming._ +import org.apache.spark.streaming.dstream._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.Logging +======= import twitter4j.auth.OAuthAuthorization import twitter4j.conf.ConfigurationBuilder @@ -26,6 +35,7 @@ import org.apache.spark.Logging import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.dstream._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.streaming.receiver.Receiver /* A stream of Twitter statuses, potentially filtered by one or more keywords. diff --git a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala index 3e843e947da61..22c13611dc80e 100644 --- a/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala +++ b/external/twitter/src/main/scala/org/apache/spark/streaming/twitter/TwitterUtils.scala @@ -19,11 +19,18 @@ package org.apache.spark.streaming.twitter import twitter4j.Status import twitter4j.auth.Authorization +<<<<<<< HEAD +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.StreamingContext +import org.apache.spark.streaming.api.java.{JavaReceiverInputDStream, JavaDStream, JavaStreamingContext} +import org.apache.spark.streaming.dstream.{ReceiverInputDStream, DStream} +======= import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.{JavaDStream, JavaReceiverInputDStream, JavaStreamingContext} import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 object TwitterUtils { /** diff --git a/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala b/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala index 63cd8a2721f0c..66df9cd84cb34 100644 --- a/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala +++ b/external/zeromq/src/main/scala/org/apache/spark/streaming/zeromq/ZeroMQUtils.scala @@ -17,8 +17,13 @@ package org.apache.spark.streaming.zeromq +<<<<<<< HEAD +import scala.reflect.ClassTag +import scala.collection.JavaConverters._ +======= import scala.collection.JavaConverters._ import scala.reflect.ClassTag +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import akka.actor.{Props, SupervisorStrategy} import akka.util.ByteString diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala index 3996f168e69ee..52ae4bd3db5d4 100644 --- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala +++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisBackedBlockRDD.scala @@ -70,6 +70,20 @@ class KinesisBackedBlockRDDPartition( */ private[kinesis] class KinesisBackedBlockRDD[T: ClassTag]( +<<<<<<< HEAD + @transient sc: SparkContext, + val regionName: String, + val endpointUrl: String, + @transient blockIds: Array[BlockId], + @transient val arrayOfseqNumberRanges: Array[SequenceNumberRanges], + @transient isBlockIdValid: Array[Boolean] = Array.empty, + val retryTimeoutMs: Int = 10000, + val messageHandler: Record => T = KinesisUtils.defaultMessageHandler _, + val awsCredentialsOption: Option[SerializableAWSCredentials] = None + ) extends BlockRDD[T](sc, blockIds) { + + require(blockIds.length == arrayOfseqNumberRanges.length, +======= sc: SparkContext, val regionName: String, val endpointUrl: String, @@ -82,14 +96,21 @@ class KinesisBackedBlockRDD[T: ClassTag]( ) extends BlockRDD[T](sc, _blockIds) { require(_blockIds.length == arrayOfseqNumberRanges.length, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 "Number of blockIds is not equal to the number of sequence number ranges") override def isValid(): Boolean = true override def getPartitions: Array[Partition] = { +<<<<<<< HEAD + Array.tabulate(blockIds.length) { i => + val isValid = if (isBlockIdValid.length == 0) true else isBlockIdValid(i) + new KinesisBackedBlockRDDPartition(i, blockIds(i), isValid, arrayOfseqNumberRanges(i)) +======= Array.tabulate(_blockIds.length) { i => val isValid = if (isBlockIdValid.length == 0) true else isBlockIdValid(i) new KinesisBackedBlockRDDPartition(i, _blockIds(i), isValid, arrayOfseqNumberRanges(i)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala index 3321c7527edb4..1d12cbb154fa7 100644 --- a/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala +++ b/extras/kinesis-asl/src/main/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.scala @@ -30,7 +30,11 @@ import org.apache.spark.streaming.scheduler.ReceivedBlockInfo import org.apache.spark.streaming.{Duration, StreamingContext, Time} private[kinesis] class KinesisInputDStream[T: ClassTag]( +<<<<<<< HEAD + @transient _ssc: StreamingContext, +======= _ssc: StreamingContext, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 streamName: String, endpointUrl: String, regionName: String, diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala index 45526bf062fab..7aa87b6e1a048 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala @@ -24,11 +24,20 @@ import org.apache.spark.Dependency import org.apache.spark.Partition import org.apache.spark.SparkContext import org.apache.spark.TaskContext +<<<<<<< HEAD +import org.apache.spark.rdd.RDD +import org.apache.spark.storage.StorageLevel + +import org.apache.spark.graphx.impl.EdgePartition +import org.apache.spark.graphx.impl.EdgePartitionBuilder +import org.apache.spark.graphx.impl.EdgeRDDImpl +======= import org.apache.spark.graphx.impl.EdgePartition import org.apache.spark.graphx.impl.EdgePartitionBuilder import org.apache.spark.graphx.impl.EdgeRDDImpl import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * `EdgeRDD[ED, VD]` extends `RDD[Edge[ED]]` by storing the edges in columnar format on each diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala index eaa71dad17a04..20ef3edac3847 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphKryoRegistrator.scala @@ -19,11 +19,20 @@ package org.apache.spark.graphx import com.esotericsoftware.kryo.Kryo +<<<<<<< HEAD +import org.apache.spark.serializer.KryoRegistrator +import org.apache.spark.util.BoundedPriorityQueue +import org.apache.spark.util.collection.BitSet + +import org.apache.spark.graphx.impl._ +import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap +======= import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.serializer.KryoRegistrator import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.BitSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.util.collection.OpenHashSet /** diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala index 1672f7d27c401..5618ca584bca6 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLoader.scala @@ -17,9 +17,15 @@ package org.apache.spark.graphx +<<<<<<< HEAD +import org.apache.spark.storage.StorageLevel +import org.apache.spark.{Logging, SparkContext} +import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} +======= import org.apache.spark.{Logging, SparkContext} import org.apache.spark.graphx.impl.{EdgePartitionBuilder, GraphImpl} import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Provides utilities for loading [[Graph]]s from files. diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala index fc36e12dd2aed..a930e6b49d2f2 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphOps.scala @@ -22,8 +22,14 @@ import scala.util.Random import org.apache.spark.SparkException import org.apache.spark.SparkContext._ +<<<<<<< HEAD +import org.apache.spark.rdd.RDD + +import org.apache.spark.graphx.lib._ +======= import org.apache.spark.graphx.lib._ import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala index 8ec33e140000e..dc3984f9731f6 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphXUtils.scala @@ -18,10 +18,19 @@ package org.apache.spark.graphx import org.apache.spark.SparkConf +<<<<<<< HEAD + +import org.apache.spark.graphx.impl._ +import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap + +import org.apache.spark.util.collection.{OpenHashSet, BitSet} +import org.apache.spark.util.BoundedPriorityQueue +======= import org.apache.spark.graphx.impl._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.BoundedPriorityQueue import org.apache.spark.util.collection.{BitSet, OpenHashSet} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 object GraphXUtils { /** diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala index b908860310093..2219de1f107b6 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Pregel.scala @@ -18,8 +18,13 @@ package org.apache.spark.graphx import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.spark.Logging + +======= import org.apache.spark.Logging +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Implements a Pregel-like bulk-synchronous message-passing API. diff --git a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala index 53a9f92b82bc5..ce1d0545434c0 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/VertexRDD.scala @@ -21,12 +21,21 @@ import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.SparkContext._ +<<<<<<< HEAD +import org.apache.spark.rdd._ +import org.apache.spark.storage.StorageLevel + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.graphx.impl.RoutingTablePartition import org.apache.spark.graphx.impl.ShippableVertexPartition import org.apache.spark.graphx.impl.VertexAttributeBlock import org.apache.spark.graphx.impl.VertexRDDImpl +<<<<<<< HEAD +======= import org.apache.spark.rdd._ import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Extends `RDD[(VertexId, VD)]` by ensuring that there is only one entry for each vertex and by diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala index b122969b817fa..11b4d0547e2ae 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgePartitionBuilder.scala @@ -21,7 +21,11 @@ import scala.reflect.ClassTag import org.apache.spark.graphx._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap +<<<<<<< HEAD +import org.apache.spark.util.collection.{SortDataFormat, Sorter, PrimitiveVector} +======= import org.apache.spark.util.collection.{PrimitiveVector, SortDataFormat, Sorter} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** Constructs an EdgePartition from scratch. */ private[graphx] diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala index 6e153b7e803ea..a985b114f53c0 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/EdgeRDDImpl.scala @@ -19,11 +19,20 @@ package org.apache.spark.graphx.impl import scala.reflect.{classTag, ClassTag} +<<<<<<< HEAD +import org.apache.spark.{OneToOneDependency, HashPartitioner} +import org.apache.spark.rdd.RDD +import org.apache.spark.storage.StorageLevel + +import org.apache.spark.graphx._ + +======= import org.apache.spark.{HashPartitioner, OneToOneDependency} import org.apache.spark.graphx._ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 class EdgeRDDImpl[ED: ClassTag, VD: ClassTag] private[graphx] ( @transient override val partitionsRDD: RDD[(PartitionID, EdgePartition[ED, VD])], val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala index 81182adbc6389..6be1ed1949a6a 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/GraphImpl.scala @@ -21,11 +21,20 @@ import scala.reflect.{classTag, ClassTag} import org.apache.spark.HashPartitioner import org.apache.spark.SparkContext._ +<<<<<<< HEAD +import org.apache.spark.rdd.{RDD, ShuffledRDD} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.graphx._ +import org.apache.spark.graphx.impl.GraphImpl._ +import org.apache.spark.graphx.util.BytecodeUtils + +======= import org.apache.spark.graphx._ import org.apache.spark.graphx.impl.GraphImpl._ import org.apache.spark.graphx.util.BytecodeUtils import org.apache.spark.rdd.{RDD, ShuffledRDD} import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * An implementation of [[org.apache.spark.graphx.Graph]] to support computation on graphs. diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala index f79f9c7ec448f..bbe8dcdc009c2 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ReplicatedVertexView.scala @@ -20,8 +20,14 @@ package org.apache.spark.graphx.impl import scala.reflect.{classTag, ClassTag} import org.apache.spark.SparkContext._ +<<<<<<< HEAD +import org.apache.spark.rdd.RDD + +import org.apache.spark.graphx._ +======= import org.apache.spark.graphx._ import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Manages shipping vertex attributes to the edge partitions of an diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala index 3fd76902af646..67ea8a3b33950 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/RoutingTablePartition.scala @@ -20,13 +20,24 @@ package org.apache.spark.graphx.impl import scala.reflect.ClassTag import org.apache.spark.Partitioner +<<<<<<< HEAD +======= import org.apache.spark.graphx._ import org.apache.spark.graphx.impl.RoutingTablePartition.RoutingTableMessage import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD import org.apache.spark.rdd.ShuffledRDD import org.apache.spark.util.collection.{BitSet, PrimitiveVector} +<<<<<<< HEAD +import org.apache.spark.graphx._ +import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap + +import org.apache.spark.graphx.impl.RoutingTablePartition.RoutingTableMessage + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[graphx] object RoutingTablePartition { /** diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala index 3f203c4eca485..3b9b59a0e0b25 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/ShippableVertexPartition.scala @@ -19,9 +19,16 @@ package org.apache.spark.graphx.impl import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.spark.util.collection.{BitSet, PrimitiveVector} + +import org.apache.spark.graphx._ +import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap +======= import org.apache.spark.graphx._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.{BitSet, PrimitiveVector} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** Stores vertex attributes to ship to an edge partition. */ private[graphx] diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala index 4512bc17399a9..2dd8f81d4857f 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartition.scala @@ -19,9 +19,16 @@ package org.apache.spark.graphx.impl import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.spark.util.collection.BitSet + +import org.apache.spark.graphx._ +import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap +======= import org.apache.spark.graphx._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.BitSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[graphx] object VertexPartition { /** Construct a `VertexPartition` from the given vertices. */ diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala index 8d608c99b1a1d..71be82e9204f7 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBase.scala @@ -20,9 +20,16 @@ package org.apache.spark.graphx.impl import scala.language.higherKinds import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.spark.util.collection.BitSet + +import org.apache.spark.graphx._ +import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap +======= import org.apache.spark.graphx._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.BitSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[graphx] object VertexPartitionBase { /** diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala index f508b483a2f1b..c4f2596994163 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexPartitionBaseOps.scala @@ -22,9 +22,16 @@ import scala.language.implicitConversions import scala.reflect.ClassTag import org.apache.spark.Logging +<<<<<<< HEAD +import org.apache.spark.util.collection.BitSet + +import org.apache.spark.graphx._ +import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap +======= import org.apache.spark.graphx._ import org.apache.spark.graphx.util.collection.GraphXPrimitiveKeyOpenHashMap import org.apache.spark.util.collection.BitSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * An class containing additional operations for subclasses of VertexPartitionBase that provide diff --git a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala index d5accdfbf7e9c..1c96d8b7f0cc9 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/impl/VertexRDDImpl.scala @@ -21,10 +21,18 @@ import scala.reflect.ClassTag import org.apache.spark._ import org.apache.spark.SparkContext._ +<<<<<<< HEAD +import org.apache.spark.rdd._ +import org.apache.spark.storage.StorageLevel + +import org.apache.spark.graphx._ + +======= import org.apache.spark.graphx._ import org.apache.spark.rdd._ import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 class VertexRDDImpl[VD] private[graphx] ( @transient val partitionsRDD: RDD[ShippableVertexPartition[VD]], val targetStorageLevel: StorageLevel = StorageLevel.MEMORY_ONLY) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala index 7a53eca7eac64..a5ca85122ee3a 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/LabelPropagation.scala @@ -18,7 +18,10 @@ package org.apache.spark.graphx.lib import scala.reflect.ClassTag +<<<<<<< HEAD +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.graphx._ /** Label Propagation algorithm. */ diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala index 35b26c998e1d9..4259dec7129df 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala @@ -17,8 +17,13 @@ package org.apache.spark.graphx.lib +<<<<<<< HEAD +import scala.reflect.ClassTag +import scala.language.postfixOps +======= import scala.language.postfixOps import scala.reflect.ClassTag +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.Logging import org.apache.spark.graphx._ diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala index 16300e0740790..986d449036ab6 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/SVDPlusPlus.scala @@ -21,8 +21,13 @@ import scala.util.Random import com.github.fommil.netlib.BLAS.{getInstance => blas} +<<<<<<< HEAD +import org.apache.spark.rdd._ +import org.apache.spark.graphx._ +======= import org.apache.spark.graphx._ import org.apache.spark.rdd._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** Implementation of SVD++ algorithm. */ object SVDPlusPlus { diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala index f0c6bcb93445c..589045abc4a42 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/ShortestPaths.scala @@ -17,10 +17,16 @@ package org.apache.spark.graphx.lib +<<<<<<< HEAD +import org.apache.spark.graphx._ +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.graphx._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Computes shortest paths to the given set of landmark vertices, returning a graph where each * vertex attribute is a map containing the shortest-path distance to each reachable landmark. diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala index 280b6c5578fe5..a7691f84fb071 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/util/GraphGenerators.scala @@ -23,11 +23,22 @@ import scala.reflect.ClassTag import scala.util._ import org.apache.spark._ +<<<<<<< HEAD +import org.apache.spark.serializer._ +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext +import org.apache.spark.SparkContext._ +import org.apache.spark.graphx._ +import org.apache.spark.graphx.Graph +import org.apache.spark.graphx.Edge +import org.apache.spark.graphx.impl.GraphImpl +======= import org.apache.spark.SparkContext._ import org.apache.spark.graphx._ import org.apache.spark.graphx.impl.GraphImpl import org.apache.spark.rdd.RDD import org.apache.spark.serializer._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** A collection of graph generating functions. */ object GraphGenerators extends Logging { diff --git a/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala b/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala index 972237da1cb28..1024868bc3ea5 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/util/collection/GraphXPrimitiveKeyOpenHashMap.scala @@ -17,10 +17,17 @@ package org.apache.spark.graphx.util.collection +<<<<<<< HEAD +import org.apache.spark.util.collection.OpenHashSet + +import scala.reflect._ + +======= import scala.reflect._ import org.apache.spark.util.collection.OpenHashSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A fast hash map implementation for primitive, non-null keys. This hash map supports * insertions and updates, but not deletions. This map is about an order of magnitude diff --git a/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java b/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java index 414ffc2c84e52..0580d35e4b0f5 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java +++ b/launcher/src/main/java/org/apache/spark/launcher/LauncherServer.java @@ -293,7 +293,13 @@ private class ServerConnection extends LauncherConnection { protected void handle(Message msg) throws IOException { try { if (msg instanceof Hello) { +<<<<<<< HEAD + synchronized (timeout) { + timeout.cancel(); + } +======= timeout.cancel(); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 timeout = null; Hello hello = (Hello) msg; ChildProcAppHandle handle = pending.remove(hello.secret); diff --git a/launcher/src/main/java/org/apache/spark/launcher/Main.java b/launcher/src/main/java/org/apache/spark/launcher/Main.java index e751e948e3561..cdcb21b28f655 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/Main.java +++ b/launcher/src/main/java/org/apache/spark/launcher/Main.java @@ -151,7 +151,11 @@ private static class MainClassOptionParser extends SparkSubmitOptionParser { @Override protected boolean handle(String opt, String value) { +<<<<<<< HEAD + if (opt == CLASS) { +======= if (CLASS.equals(opt)) { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 className = value; } return false; diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index 3acc60d6c6d65..b043f6119977c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -26,9 +26,17 @@ import org.apache.hadoop.fs.Path import org.json4s._ import org.json4s.jackson.JsonMethods._ +<<<<<<< HEAD +import org.apache.spark.{SparkContext, Logging} +import org.apache.spark.annotation.{Since, DeveloperApi, Experimental} +import org.apache.spark.ml.param.{Param, ParamMap, Params} +import org.apache.spark.ml.util.MLReader +import org.apache.spark.ml.util.MLWriter +======= import org.apache.spark.{Logging, SparkContext} import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} import org.apache.spark.ml.param.{Param, ParamMap, Params} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.util._ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.types.StructType diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala index 6aacffd4f236f..639d0a17b1b9c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala @@ -24,9 +24,15 @@ import org.apache.spark.ml.util.SchemaUtils import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{DataType, DoubleType, StructType} +import org.apache.spark.sql.{DataFrame, Row} +======= import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DataType, DoubleType, StructType} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * (private[ml]) Trait for parameters for prediction (regression and classification). diff --git a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala index d02806a6ea227..96f5583e83698 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/ann/Layer.scala @@ -17,8 +17,13 @@ package org.apache.spark.ml.ann +<<<<<<< HEAD +import breeze.linalg.{*, DenseMatrix => BDM, DenseVector => BDV, Vector => BV, axpy => Baxpy, + sum => Bsum} +======= import breeze.linalg.{*, axpy => Baxpy, sum => Bsum, DenseMatrix => BDM, DenseVector => BDV, Vector => BV} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import breeze.numerics.{log => Blog, sigmoid => Bsigmoid} import org.apache.spark.mllib.linalg.{Vector, Vectors} diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala index 521d209a8f0ed..e7db6fac85a19 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala @@ -20,7 +20,11 @@ package org.apache.spark.ml.attribute import scala.annotation.varargs import org.apache.spark.annotation.DeveloperApi +<<<<<<< HEAD +import org.apache.spark.sql.types.{DoubleType, NumericType, Metadata, MetadataBuilder, StructField} +======= import org.apache.spark.sql.types.{DoubleType, Metadata, MetadataBuilder, NumericType, StructField} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala index f6964054db839..5a6a63ae80ac5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala @@ -17,8 +17,13 @@ package org.apache.spark.ml +<<<<<<< HEAD +import org.apache.spark.sql.DataFrame +import org.apache.spark.ml.attribute.{Attribute, AttributeGroup} +======= import org.apache.spark.ml.attribute.{Attribute, AttributeGroup} import org.apache.spark.sql.DataFrame +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * ==ML attributes== diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala index 8186afc17a53e..f20edd856d2ce 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala @@ -18,7 +18,11 @@ package org.apache.spark.ml.classification import org.apache.spark.annotation.DeveloperApi +<<<<<<< HEAD +import org.apache.spark.ml.{PredictionModel, PredictorParams, Predictor} +======= import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.param.shared.HasRawPredictionCol import org.apache.spark.ml.util.SchemaUtils import org.apache.spark.mllib.linalg.{Vector, VectorUDT} @@ -26,6 +30,10 @@ import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DataType, StructType} +<<<<<<< HEAD + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * (private[spark]) Params for classification. */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 74bf07c3f1fff..89cff4274a64f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -33,7 +33,11 @@ import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} import org.apache.spark.mllib.tree.loss.{LogLoss => OldLogLoss, Loss => OldLoss} import org.apache.spark.mllib.tree.model.{GradientBoostedTreesModel => OldGBTModel} import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import org.apache.spark.sql.{Row, DataFrame} +======= import org.apache.spark.sql.{DataFrame, Row} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.DoubleType diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala index 719d1076fee8e..42932890b68b7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala @@ -20,12 +20,21 @@ package org.apache.spark.ml.classification import scala.collection.JavaConverters._ import org.apache.spark.annotation.{Experimental, Since} +<<<<<<< HEAD +import org.apache.spark.ml.param.shared.{HasTol, HasMaxIter, HasSeed} +import org.apache.spark.ml.{PredictorParams, PredictionModel, Predictor} +import org.apache.spark.ml.param.{IntParam, ParamValidators, IntArrayParam, ParamMap} +import org.apache.spark.ml.util.Identifiable +import org.apache.spark.ml.ann.{FeedForwardTrainer, FeedForwardTopology} +import org.apache.spark.mllib.linalg.{Vectors, Vector} +======= import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams} import org.apache.spark.ml.ann.{FeedForwardTopology, FeedForwardTrainer} import org.apache.spark.ml.param.{IntArrayParam, IntParam, ParamMap, ParamValidators} import org.apache.spark.ml.param.shared.{HasMaxIter, HasSeed, HasTol} import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.linalg.{Vector, Vectors} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.sql.DataFrame diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala index 865614aa5c8a7..5846ad300f862 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala @@ -20,7 +20,11 @@ package org.apache.spark.ml.classification import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util.SchemaUtils +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{DenseVector, Vector, VectorUDT, Vectors} +======= import org.apache.spark.mllib.linalg.{DenseVector, Vector, Vectors, VectorUDT} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DataType, StructType} diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala index f7d662df2fe5a..639c0ea582fca 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala @@ -18,11 +18,19 @@ package org.apache.spark.ml.classification import org.apache.spark.annotation.{Experimental, Since} +<<<<<<< HEAD +import org.apache.spark.ml.tree.impl.RandomForest +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeClassifierParams, TreeEnsembleModel} +import org.apache.spark.ml.util.{Identifiable, MetadataUtils} +import org.apache.spark.mllib.linalg.{SparseVector, DenseVector, Vector, Vectors} +======= import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.tree.{DecisionTreeModel, RandomForestParams, TreeClassifierParams, TreeEnsembleModel} import org.apache.spark.ml.tree.impl.RandomForest import org.apache.spark.ml.util.{Identifiable, MetadataUtils} import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo} import org.apache.spark.mllib.tree.model.{RandomForestModel => OldRandomForestModel} diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index 6e5abb29ff0a3..5d53428348e72 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -20,6 +20,17 @@ package org.apache.spark.ml.clustering import org.apache.hadoop.fs.Path import org.apache.spark.annotation.{Experimental, Since} +<<<<<<< HEAD +import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.param.{IntParam, Param, ParamMap, Params} +import org.apache.spark.ml.util._ +import org.apache.spark.ml.{Estimator, Model} +import org.apache.spark.mllib.clustering.{KMeans => MLlibKMeans, KMeansModel => MLlibKMeansModel} +import org.apache.spark.mllib.linalg.{Vector, VectorUDT} +import org.apache.spark.sql.functions.{col, udf} +import org.apache.spark.sql.types.{IntegerType, StructType} +import org.apache.spark.sql.{DataFrame, Row} +======= import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param.{IntParam, Param, ParamMap, Params} import org.apache.spark.ml.param.shared._ @@ -29,6 +40,7 @@ import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions.{col, udf} import org.apache.spark.sql.types.{IntegerType, StructType} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Common params for KMeans and KMeansModel diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index af0b3e1835003..c346338bcc7b1 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -18,20 +18,34 @@ package org.apache.spark.ml.clustering import org.apache.hadoop.fs.Path +<<<<<<< HEAD +import org.apache.spark.Logging +import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.ml.{Estimator, Model} +import org.apache.spark.ml.param.shared.{HasCheckpointInterval, HasFeaturesCol, HasSeed, HasMaxIter} +import org.apache.spark.ml.param._ +======= import org.apache.spark.Logging import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.{HasCheckpointInterval, HasFeaturesCol, HasMaxIter, HasSeed} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.util._ import org.apache.spark.mllib.clustering.{DistributedLDAModel => OldDistributedLDAModel, EMLDAOptimizer => OldEMLDAOptimizer, LDA => OldLDA, LDAModel => OldLDAModel, LDAOptimizer => OldLDAOptimizer, LocalLDAModel => OldLocalLDAModel, OnlineLDAOptimizer => OldOnlineLDAOptimizer} +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{VectorUDT, Vectors, Matrix, Vector} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{SQLContext, DataFrame, Row} +======= import org.apache.spark.mllib.linalg.{Matrix, Vector, Vectors, VectorUDT} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row, SQLContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.functions.{col, monotonicallyIncreasingId, udf} import org.apache.spark.sql.types.StructType diff --git a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala index a921153b9474f..59ab16838b48c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/evaluation/MulticlassClassificationEvaluator.scala @@ -18,11 +18,19 @@ package org.apache.spark.ml.evaluation import org.apache.spark.annotation.{Experimental, Since} +<<<<<<< HEAD +import org.apache.spark.ml.param.{ParamMap, ParamValidators, Param} +import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol} +import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, SchemaUtils, Identifiable} +import org.apache.spark.mllib.evaluation.MulticlassMetrics +import org.apache.spark.sql.{Row, DataFrame} +======= import org.apache.spark.ml.param.{Param, ParamMap, ParamValidators} import org.apache.spark.ml.param.shared.{HasLabelCol, HasPredictionCol} import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils} import org.apache.spark.mllib.evaluation.MulticlassMetrics import org.apache.spark.sql.{DataFrame, Row} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.types.DoubleType /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala index 5b17d3483b895..dec445eb5deee 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala @@ -17,7 +17,11 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.BinaryAttribute import org.apache.spark.ml.param._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala index 33abc7c99d4b0..4f2e38a6b5f62 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Bucketizer.scala @@ -20,7 +20,11 @@ package org.apache.spark.ml.feature import java.{util => ju} import org.apache.spark.SparkException +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.Model import org.apache.spark.ml.attribute.NominalAttribute import org.apache.spark.ml.param._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 1268c87908c62..ebcebdd611781 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -24,7 +24,11 @@ import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} import org.apache.spark.ml.util._ +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{VectorUDT, Vectors} +======= import org.apache.spark.mllib.linalg.{Vectors, VectorUDT} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame import org.apache.spark.sql.functions._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala index a6f878151de73..14c5740506e04 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala @@ -19,11 +19,19 @@ package org.apache.spark.ml.feature import edu.emory.mathcs.jtransforms.dct._ +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +import org.apache.spark.ml.UnaryTransformer +import org.apache.spark.ml.param.BooleanParam +import org.apache.spark.ml.util._ +import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} +======= import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.BooleanParam import org.apache.spark.ml.util._ import org.apache.spark.mllib.linalg.{Vector, Vectors, VectorUDT} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.types.DataType /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala index 07a12df320357..21098006c6948 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ElementwiseProduct.scala @@ -19,7 +19,11 @@ package org.apache.spark.ml.feature import org.apache.spark.annotation.Experimental import org.apache.spark.ml.UnaryTransformer +<<<<<<< HEAD +import org.apache.spark.ml.param.{ParamMap, Param} +======= import org.apache.spark.ml.param.{Param, ParamMap} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.util.Identifiable import org.apache.spark.mllib.feature import org.apache.spark.mllib.linalg.{Vector, VectorUDT} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala index 61a78d73c4347..8ed7ccebc20bd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala @@ -17,7 +17,11 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.AttributeGroup import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala index 7d2a1da990fce..744888472d42d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala @@ -20,13 +20,21 @@ package org.apache.spark.ml.feature import scala.collection.mutable.ArrayBuilder import org.apache.spark.SparkException +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.attribute._ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util._ import org.apache.spark.ml.Transformer +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} +======= import org.apache.spark.mllib.linalg.{Vector, Vectors, VectorUDT} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 559a025265916..982f06a5b0f21 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -17,6 +17,10 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.fs.Path import org.apache.spark.annotation.{Experimental, Since} @@ -24,7 +28,11 @@ import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param.{DoubleParam, ParamMap, Params} import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} import org.apache.spark.ml.util._ +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} +======= import org.apache.spark.mllib.linalg.{Vector, Vectors, VectorUDT} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.stat.Statistics import org.apache.spark.sql._ import org.apache.spark.sql.functions._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala index f8bc7e3f0c031..5d230577a7e47 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/NGram.scala @@ -17,7 +17,11 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala index a603b3f833202..47d27ce3a75f2 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala @@ -17,7 +17,11 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{DoubleParam, ParamValidators} import org.apache.spark.ml.util._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index c01e29af478c0..b5ed0359cc6da 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -17,7 +17,11 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute._ import org.apache.spark.ml.param._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index f653798b46043..ee6a3ba04e4ad 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -180,7 +180,11 @@ object PCAModel extends MLReadable[PCAModel] { val metadata = DefaultParamsReader.loadMetadata(path, sc, className) // explainedVariance field is not present in Spark <= 1.6 +<<<<<<< HEAD + val versionRegex = "([0-9]+)\\.([0-9])+.*".r +======= val versionRegex = "([0-9]+)\\.([0-9]+).*".r +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val hasExplainedVariance = metadata.sparkVersion match { case versionRegex(major, minor) => (major.toInt >= 2 || (major.toInt == 1 && minor.toInt > 6)) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index 42b26c8ee836c..bb149e55e4033 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -19,9 +19,15 @@ package org.apache.spark.ml.feature import scala.collection.mutable +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +import org.apache.spark.ml.UnaryTransformer +import org.apache.spark.ml.param.{ParamMap, IntParam, ParamValidators} +======= import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param.{IntParam, ParamMap, ParamValidators} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.util._ import org.apache.spark.mllib.linalg._ import org.apache.spark.sql.types.DataType diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala index 39de8461dc9c7..ca29abb986b70 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala @@ -20,6 +20,16 @@ package org.apache.spark.ml.feature import scala.collection.mutable import org.apache.spark.Logging +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +import org.apache.spark.ml._ +import org.apache.spark.ml.attribute.NominalAttribute +import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} +import org.apache.spark.ml.param.{IntParam, _} +import org.apache.spark.ml.util._ +import org.apache.spark.sql.types.{DoubleType, StructType} +import org.apache.spark.sql.{DataFrame, Row} +======= import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml._ import org.apache.spark.ml.attribute.NominalAttribute @@ -28,6 +38,7 @@ import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} import org.apache.spark.ml.util._ import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.types.{DoubleType, StructType} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.util.random.XORShiftRandom /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 2b578c2a95e16..b85162ba707aa 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -21,8 +21,13 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuffer import org.apache.spark.annotation.Experimental +<<<<<<< HEAD +import org.apache.spark.ml.attribute.AttributeGroup +import org.apache.spark.ml.{Estimator, Model, Pipeline, PipelineModel, PipelineStage, Transformer} +======= import org.apache.spark.ml.{Estimator, Model, Pipeline, PipelineModel, PipelineStage, Transformer} import org.apache.spark.ml.attribute.AttributeGroup +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.param.{Param, ParamMap} import org.apache.spark.ml.param.shared.{HasFeaturesCol, HasLabelCol} import org.apache.spark.ml.util.Identifiable diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala index e0ca45b9a6190..5e49d909fdc3a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/SQLTransformer.scala @@ -18,11 +18,19 @@ package org.apache.spark.ml.feature import org.apache.spark.SparkContext +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +import org.apache.spark.ml.param.{ParamMap, Param} +import org.apache.spark.ml.Transformer +import org.apache.spark.ml.util._ +import org.apache.spark.sql.{SQLContext, DataFrame, Row} +======= import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.param.{Param, ParamMap} import org.apache.spark.ml.Transformer import org.apache.spark.ml.util._ import org.apache.spark.sql.{DataFrame, Row, SQLContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.types.StructType /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala index 5d6936dce2c74..02c5419a9baca 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala @@ -17,7 +17,11 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.Transformer import org.apache.spark.ml.param.{BooleanParam, ParamMap, StringArrayParam} import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala index 8456a0e915804..2357713dfa4c9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Tokenizer.scala @@ -17,7 +17,11 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.UnaryTransformer import org.apache.spark.ml.param._ import org.apache.spark.ml.util._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala index e9d1b57b91d07..b6f01e28f94bf 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala @@ -20,13 +20,21 @@ package org.apache.spark.ml.feature import scala.collection.mutable.ArrayBuilder import org.apache.spark.SparkException +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +======= import org.apache.spark.annotation.{Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.{Attribute, AttributeGroup, NumericAttribute, UnresolvedAttribute} import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util._ +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} +======= import org.apache.spark.mllib.linalg.{Vector, Vectors, VectorUDT} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala index 4813d8a5b5dc0..1356c08f19d80 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala @@ -17,11 +17,19 @@ package org.apache.spark.ml.feature +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, Experimental} +import org.apache.spark.ml.Transformer +import org.apache.spark.ml.attribute.{Attribute, AttributeGroup} +import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} +import org.apache.spark.ml.param.{IntArrayParam, ParamMap, StringArrayParam} +======= import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.Transformer import org.apache.spark.ml.attribute.{Attribute, AttributeGroup} import org.apache.spark.ml.param.{IntArrayParam, ParamMap, StringArrayParam} import org.apache.spark.ml.param.shared.{HasInputCol, HasOutputCol} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.util._ import org.apache.spark.mllib.linalg._ import org.apache.spark.sql.DataFrame diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index 59c34cd1703aa..2614c61c1186e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -26,7 +26,11 @@ import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util._ import org.apache.spark.mllib.feature +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT, Vectors} +======= import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors, VectorUDT} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.{DataFrame, Row, SQLContext} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index c0546695e487b..a0fe3ea4b1b35 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -859,12 +859,17 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) * Filters this param map for the given parent. */ def filter(parent: Params): ParamMap = { +<<<<<<< HEAD + val filtered = map.filterKeys(_.parent == parent) + new ParamMap(filtered.asInstanceOf[mutable.Map[Param[Any], Any]]) +======= // Don't use filterKeys because mutable.Map#filterKeys // returns the instance of collections.Map, not mutable.Map. // Otherwise, we get ClassCastException. // Not using filterKeys also avoid SI-6654 val filtered = map.filter { case (k, _) => k.parent == parent.uid } new ParamMap(filtered) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala index 551e75dc0a02d..1fb3b7f34f139 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/r/SparkRWrappers.scala @@ -17,11 +17,19 @@ package org.apache.spark.ml.api.r +<<<<<<< HEAD +import org.apache.spark.ml.attribute._ +import org.apache.spark.ml.feature.RFormula +import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel} +import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} +import org.apache.spark.ml.{Pipeline, PipelineModel} +======= import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.attribute._ import org.apache.spark.ml.classification.{LogisticRegression, LogisticRegressionModel} import org.apache.spark.ml.feature.RFormula import org.apache.spark.ml.regression.{LinearRegression, LinearRegressionModel} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.DataFrame private[r] object SparkRWrappers { diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 14a28b8d5b51f..5fe734eaf947a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -31,7 +31,11 @@ import org.json4s.DefaultFormats import org.json4s.JsonDSL._ import org.apache.spark.{Logging, Partitioner} +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, DeveloperApi, Experimental} +======= import org.apache.spark.annotation.{DeveloperApi, Experimental, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index 3787ca45d5172..78a74de84371e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -23,6 +23,20 @@ import breeze.linalg.{DenseVector => BDV} import breeze.optimize.{CachedDiffFunction, DiffFunction, LBFGS => BreezeLBFGS} import org.apache.hadoop.fs.Path +<<<<<<< HEAD +import org.apache.spark.annotation.{Experimental, Since} +import org.apache.spark.ml.param._ +import org.apache.spark.ml.param.shared._ +import org.apache.spark.ml.util._ +import org.apache.spark.ml.{Estimator, Model} +import org.apache.spark.mllib.linalg.{BLAS, Vector, VectorUDT, Vectors} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.types.{DoubleType, StructType} +import org.apache.spark.sql.{DataFrame, Row} +import org.apache.spark.storage.StorageLevel +import org.apache.spark.{Logging, SparkException} +======= import org.apache.spark.{Logging, SparkException} import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.{Estimator, Model} @@ -35,6 +49,7 @@ import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions._ import org.apache.spark.sql.types.{DoubleType, StructType} import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Params for accelerated failure time (AFT) regression. diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index e8d361b1a2a8a..e82e5de66b538 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -21,11 +21,24 @@ import org.apache.hadoop.fs.Path import org.apache.spark.Logging import org.apache.spark.annotation.{Experimental, Since} +<<<<<<< HEAD +======= import org.apache.spark.ml.{Estimator, Model} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.regression.IsotonicRegressionModel.IsotonicRegressionModelWriter import org.apache.spark.ml.util._ +<<<<<<< HEAD +import org.apache.spark.ml.{Estimator, Model} +import org.apache.spark.mllib.linalg.{Vector, VectorUDT, Vectors} +import org.apache.spark.mllib.regression.{IsotonicRegression => MLlibIsotonicRegression} +import org.apache.spark.mllib.regression.{IsotonicRegressionModel => MLlibIsotonicRegressionModel} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.functions.{col, lit, udf} +import org.apache.spark.sql.types.{DoubleType, StructType} +import org.apache.spark.sql.{DataFrame, Row} +======= import org.apache.spark.mllib.linalg.{Vector, Vectors, VectorUDT} import org.apache.spark.mllib.regression.{IsotonicRegression => MLlibIsotonicRegression} import org.apache.spark.mllib.regression.{IsotonicRegressionModel => MLlibIsotonicRegressionModel} @@ -33,6 +46,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row} import org.apache.spark.sql.functions.{col, lit, udf} import org.apache.spark.sql.types.{DoubleType, StructType} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.storage.StorageLevel /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index dee26337dcdf6..c8ef7f7cc223b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -25,9 +25,15 @@ import breeze.stats.distributions.StudentsT import org.apache.hadoop.fs.Path import org.apache.spark.{Logging, SparkException} +<<<<<<< HEAD +import org.apache.spark.ml.feature.Instance +import org.apache.spark.ml.optim.WeightedLeastSquares +import org.apache.spark.annotation.{Experimental, Since} +======= import org.apache.spark.annotation.{Experimental, Since} import org.apache.spark.ml.feature.Instance import org.apache.spark.ml.optim.WeightedLeastSquares +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.PredictorParams import org.apache.spark.ml.param.ParamMap import org.apache.spark.ml.param.shared._ diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala index cf189e8e96f95..970c068c01345 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala @@ -18,7 +18,11 @@ package org.apache.spark.ml.regression import org.apache.spark.annotation.DeveloperApi +<<<<<<< HEAD +import org.apache.spark.ml.{PredictionModel, PredictorParams, Predictor} +======= import org.apache.spark.ml.{PredictionModel, Predictor, PredictorParams} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** diff --git a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala index 1bed542c40316..bc55f22087c14 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/source/libsvm/LibSVMRelation.scala @@ -24,7 +24,11 @@ import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.{Vector, VectorUDT} import org.apache.spark.mllib.util.MLUtils import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import org.apache.spark.sql.{DataFrameReader, DataFrame, Row, SQLContext} +======= import org.apache.spark.sql.{DataFrame, DataFrameReader, Row, SQLContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.sources._ import org.apache.spark.sql.types.{DoubleType, StructField, StructType} diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala index 6507a8ad7cf3f..4eeab0555b896 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala @@ -20,8 +20,13 @@ package org.apache.spark.ml.tree import org.apache.spark.annotation.DeveloperApi import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.tree.impurity.ImpurityCalculator +<<<<<<< HEAD +import org.apache.spark.mllib.tree.model.{InformationGainStats => OldInformationGainStats, + Node => OldNode, Predict => OldPredict, ImpurityStats} +======= import org.apache.spark.mllib.tree.model.{ImpurityStats, InformationGainStats => OldInformationGainStats, Node => OldNode, Predict => OldPredict} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: @@ -386,9 +391,15 @@ private[tree] object LearningNode { var levelsToGo = indexToLevel(nodeIndex) while (levelsToGo > 0) { if ((nodeIndex & (1 << levelsToGo - 1)) == 0) { +<<<<<<< HEAD + tmpNode = tmpNode.leftChild.asInstanceOf[LearningNode] + } else { + tmpNode = tmpNode.rightChild.asInstanceOf[LearningNode] +======= tmpNode = tmpNode.leftChild.get } else { tmpNode = tmpNode.rightChild.get +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } levelsToGo -= 1 } diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala index 172ea52820564..5e4ce71401ae0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/NodeIdCache.scala @@ -21,7 +21,11 @@ import java.io.IOException import scala.collection.mutable +<<<<<<< HEAD +import org.apache.hadoop.fs.{Path, FileSystem} +======= import org.apache.hadoop.fs.{FileSystem, Path} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.Logging import org.apache.spark.annotation.DeveloperApi diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala index 6e87302c7779b..08236c9ae751b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/impl/RandomForest.scala @@ -26,10 +26,17 @@ import org.apache.spark.Logging import org.apache.spark.ml.classification.DecisionTreeClassificationModel import org.apache.spark.ml.regression.DecisionTreeRegressionModel import org.apache.spark.ml.tree._ +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{Vectors, Vector} +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy} +import org.apache.spark.mllib.tree.impl.{BaggedPoint, DTStatsAggregator, DecisionTreeMetadata, +======= import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo, Strategy => OldStrategy} import org.apache.spark.mllib.tree.impl.{BaggedPoint, DecisionTreeMetadata, DTStatsAggregator, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 TimeTracker} import org.apache.spark.mllib.tree.impurity.ImpurityCalculator import org.apache.spark.mllib.tree.model.ImpurityStats diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala index 40ed95773e149..ccbbb4ff814ec 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala @@ -17,7 +17,11 @@ package org.apache.spark.ml.tree +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{Vectors, Vector} +======= import org.apache.spark.mllib.linalg.{Vector, Vectors} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Abstraction for Decision Tree models. diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala index 477675cad1a90..29ec2a0d1dbfa 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala @@ -19,8 +19,13 @@ package org.apache.spark.ml.tuning import com.github.fommil.netlib.F2jBLAS import org.apache.hadoop.fs.Path +<<<<<<< HEAD +import org.json4s.jackson.JsonMethods._ +import org.json4s.{DefaultFormats, JObject} +======= import org.json4s.{DefaultFormats, JObject} import org.json4s.jackson.JsonMethods._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.{Logging, SparkContext} import org.apache.spark.annotation.{Experimental, Since} diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala index f346ea655ae5a..45691c4093e87 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala @@ -19,8 +19,13 @@ package org.apache.spark.ml.tuning import org.apache.spark.Logging import org.apache.spark.annotation.{Experimental, Since} +<<<<<<< HEAD +import org.apache.spark.ml.evaluation.Evaluator +import org.apache.spark.ml.{Estimator, Model} +======= import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.evaluation.Evaluator +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.ml.param.{DoubleParam, ParamMap, ParamValidators} import org.apache.spark.ml.util.Identifiable import org.apache.spark.sql.DataFrame diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala index 553f254172410..b897bfc018b28 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala @@ -20,7 +20,11 @@ package org.apache.spark.ml.tuning import org.apache.spark.annotation.DeveloperApi import org.apache.spark.ml.Estimator import org.apache.spark.ml.evaluation.Evaluator +<<<<<<< HEAD +import org.apache.spark.ml.param.{ParamMap, Param, Params} +======= import org.apache.spark.ml.param.{Param, ParamMap, Params} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PowerIterationClusteringModelWrapper.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PowerIterationClusteringModelWrapper.scala index 6530870b83a11..9bd9703dd3626 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PowerIterationClusteringModelWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PowerIterationClusteringModelWrapper.scala @@ -17,8 +17,13 @@ package org.apache.spark.mllib.api.python +<<<<<<< HEAD +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.clustering.PowerIterationClusteringModel +======= import org.apache.spark.mllib.clustering.PowerIterationClusteringModel import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A Wrapper of PowerIterationClusteringModel to provide helper method for Python diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala index 061db56c74938..f6a3a87313600 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/PythonMLLibAPI.scala @@ -42,17 +42,31 @@ import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.random.{RandomRDDs => RG} import org.apache.spark.mllib.recommendation._ import org.apache.spark.mllib.regression._ +<<<<<<< HEAD +import org.apache.spark.mllib.stat.correlation.CorrelationNames +import org.apache.spark.mllib.stat.distribution.MultivariateGaussian +import org.apache.spark.mllib.stat.test.{ChiSqTestResult, KolmogorovSmirnovTestResult} +import org.apache.spark.mllib.stat.{ + KernelDensity, MultivariateStatisticalSummary, Statistics} +======= import org.apache.spark.mllib.stat.{ KernelDensity, MultivariateStatisticalSummary, Statistics} import org.apache.spark.mllib.stat.correlation.CorrelationNames import org.apache.spark.mllib.stat.distribution.MultivariateGaussian import org.apache.spark.mllib.stat.test.{ChiSqTestResult, KolmogorovSmirnovTestResult} import org.apache.spark.mllib.tree.{DecisionTree, GradientBoostedTrees, RandomForest} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.tree.configuration.{Algo, BoostingStrategy, Strategy} import org.apache.spark.mllib.tree.impurity._ import org.apache.spark.mllib.tree.loss.Losses import org.apache.spark.mllib.tree.model.{DecisionTreeModel, GradientBoostedTreesModel, RandomForestModel} +<<<<<<< HEAD +import org.apache.spark.mllib.tree.{DecisionTree, GradientBoostedTrees, RandomForest} +import org.apache.spark.mllib.util.MLUtils +import org.apache.spark.mllib.util.LinearDataGenerator +======= import org.apache.spark.mllib.util.{LinearDataGenerator, MLUtils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD import org.apache.spark.sql.{DataFrame, Row, SQLContext} import org.apache.spark.storage.StorageLevel diff --git a/mllib/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala b/mllib/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala index 55dfd973eb25f..a34b60901ba62 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/api/python/Word2VecModelWrapper.scala @@ -18,7 +18,10 @@ package org.apache.spark.mllib.api.python import java.util.{ArrayList => JArrayList, List => JList, Map => JMap} +<<<<<<< HEAD +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.collection.JavaConverters._ import org.apache.spark.SparkContext diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index 2a7697b5a79cc..4578c891c92e5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -20,12 +20,21 @@ package org.apache.spark.mllib.classification import org.apache.spark.SparkContext import org.apache.spark.annotation.Since import org.apache.spark.mllib.classification.impl.GLMClassificationModel +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.BLAS.dot +import org.apache.spark.mllib.linalg.{DenseVector, Vector} +import org.apache.spark.mllib.optimization._ +import org.apache.spark.mllib.pmml.PMMLExportable +import org.apache.spark.mllib.regression._ +import org.apache.spark.mllib.util.{DataValidators, Saveable, Loader} +======= import org.apache.spark.mllib.linalg.{DenseVector, Vector} import org.apache.spark.mllib.linalg.BLAS.dot import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.pmml.PMMLExportable import org.apache.spark.mllib.regression._ import org.apache.spark.mllib.util.{DataValidators, Loader, Saveable} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index 5c9bc62cb09bb..b505e4412dfee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -18,6 +18,10 @@ package org.apache.spark.mllib.clustering import breeze.linalg.{DenseVector => BreezeVector} +<<<<<<< HEAD + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.json4s.DefaultFormats import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ @@ -25,11 +29,19 @@ import org.json4s.jackson.JsonMethods._ import org.apache.spark.SparkContext import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{Vector, Matrices, Matrix} +import org.apache.spark.mllib.stat.distribution.MultivariateGaussian +import org.apache.spark.mllib.util.{MLUtils, Loader, Saveable} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.{SQLContext, Row} +======= import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector} import org.apache.spark.mllib.stat.distribution.MultivariateGaussian import org.apache.spark.mllib.util.{Loader, MLUtils, Saveable} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SQLContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Multivariate Gaussian Mixture Model (GMM) consisting of k Gaussians, where points diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index 26c6235fe5907..685e66b598498 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -23,14 +23,23 @@ import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ +<<<<<<< HEAD +======= import org.apache.spark.SparkContext +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.pmml.PMMLExportable import org.apache.spark.mllib.util.{Loader, Saveable} import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import org.apache.spark.SparkContext +import org.apache.spark.sql.SQLContext +import org.apache.spark.sql.Row +======= import org.apache.spark.sql.{Row, SQLContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A clustering model for K-means. Each point belongs to the cluster with the closest center. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index 2fce3ff641101..8b6d0eaa5377e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -17,7 +17,11 @@ package org.apache.spark.mllib.clustering +<<<<<<< HEAD +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, argmax, argtopk, normalize, sum} +======= import breeze.linalg.{argmax, argtopk, normalize, sum, DenseMatrix => BDM, DenseVector => BDV} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import breeze.numerics.{exp, lgamma} import org.apache.hadoop.fs.Path import org.json4s.DefaultFormats diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala index c19595e6cd21b..87f18e18f4351 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAOptimizer.scala @@ -19,8 +19,13 @@ package org.apache.spark.mllib.clustering import java.util.Random +<<<<<<< HEAD +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, all, normalize, sum} +import breeze.numerics.{trigamma, abs, exp} +======= import breeze.linalg.{all, normalize, sum, DenseMatrix => BDM, DenseVector => BDV} import breeze.numerics.{abs, exp, trigamma} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import breeze.stats.distributions.{Gamma, RandBasis} import org.apache.spark.annotation.{DeveloperApi, Since} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala index 647d37bd822c1..7550af9f720b9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAUtils.scala @@ -16,7 +16,11 @@ */ package org.apache.spark.mllib.clustering +<<<<<<< HEAD +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, max, sum} +======= import breeze.linalg.{max, sum, DenseMatrix => BDM, DenseVector => BDV} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import breeze.numerics._ /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala index c9a96c68667af..d50176aafdaba 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LocalKMeans.scala @@ -20,8 +20,13 @@ package org.apache.spark.mllib.clustering import scala.util.Random import org.apache.spark.Logging +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.Vectors +import org.apache.spark.mllib.linalg.BLAS.{axpy, scal} +======= import org.apache.spark.mllib.linalg.BLAS.{axpy, scal} import org.apache.spark.mllib.linalg.Vectors +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * An utility object to run K-means locally. This is private to the ML package because it's used diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala index 2ab0920b06363..463553dc57a3b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala @@ -17,11 +17,18 @@ package org.apache.spark.mllib.clustering +<<<<<<< HEAD +import org.json4s.JsonDSL._ +import org.json4s._ +import org.json4s.jackson.JsonMethods._ + +======= import org.json4s._ import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ import org.apache.spark.{Logging, SparkContext, SparkException} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaRDD import org.apache.spark.graphx._ @@ -31,6 +38,10 @@ import org.apache.spark.mllib.util.{Loader, MLUtils, Saveable} import org.apache.spark.rdd.RDD import org.apache.spark.sql.{Row, SQLContext} import org.apache.spark.util.random.XORShiftRandom +<<<<<<< HEAD +import org.apache.spark.{Logging, SparkContext, SparkException} +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Model produced by [[PowerIterationClustering]]. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala index 79d217e183c62..2350e92068c5c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala @@ -24,7 +24,11 @@ import org.apache.spark.annotation.Since import org.apache.spark.api.java.JavaSparkContext._ import org.apache.spark.mllib.linalg.{BLAS, Vector, Vectors} import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStream} +======= import org.apache.spark.streaming.api.java.{JavaDStream, JavaPairDStream} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.streaming.dstream.DStream import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala index f0779491e6374..385af476f48f9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/AreaUnderCurve.scala @@ -17,8 +17,13 @@ package org.apache.spark.mllib.evaluation +<<<<<<< HEAD +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.rdd.RDDFunctions._ +======= import org.apache.spark.mllib.rdd.RDDFunctions._ import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Computes the area under the curve (AUC) using the trapezoidal rule. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala index f8de4e2220c4d..325b796889a48 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RankingMetrics.scala @@ -24,7 +24,11 @@ import scala.reflect.ClassTag import org.apache.spark.Logging import org.apache.spark.annotation.Since +<<<<<<< HEAD +import org.apache.spark.api.java.{JavaSparkContext, JavaRDD} +======= import org.apache.spark.api.java.{JavaRDD, JavaSparkContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala index 34883f2f390d1..f29ce8e6fa178 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/RegressionMetrics.scala @@ -21,7 +21,11 @@ import org.apache.spark.annotation.Since import org.apache.spark.rdd.RDD import org.apache.spark.Logging import org.apache.spark.mllib.linalg.Vectors +<<<<<<< HEAD +import org.apache.spark.mllib.stat.{MultivariateStatisticalSummary, MultivariateOnlineSummarizer} +======= import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.DataFrame /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index 33728bf5d77e5..e04c128600f11 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -30,7 +30,11 @@ import org.apache.spark.mllib.stat.Statistics import org.apache.spark.mllib.util.{Loader, Saveable} import org.apache.spark.rdd.RDD import org.apache.spark.SparkContext +<<<<<<< HEAD +import org.apache.spark.sql.{SQLContext, Row} +======= import org.apache.spark.sql.{Row, SQLContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Chi Squared selector model. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index a7e1b76df6a7d..d28c029ea0200 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -24,6 +24,10 @@ import scala.collection.mutable import scala.collection.mutable.ArrayBuilder import com.github.fommil.netlib.BLAS.{getInstance => blas} +<<<<<<< HEAD + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.json4s.DefaultFormats import org.json4s.JsonDSL._ import org.json4s.jackson.JsonMethods._ @@ -35,9 +39,15 @@ import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.util.{Loader, Saveable} import org.apache.spark.rdd._ +<<<<<<< HEAD +import org.apache.spark.util.Utils +import org.apache.spark.util.random.XORShiftRandom +import org.apache.spark.sql.SQLContext +======= import org.apache.spark.sql.SQLContext import org.apache.spark.util.Utils import org.apache.spark.util.random.XORShiftRandom +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Entry in vocabulary diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala index ed49c9492fdcd..dc8953986b1c6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala @@ -20,8 +20,13 @@ package org.apache.spark.mllib.fpm import java.{lang => jl, util => ju} import java.util.concurrent.atomic.AtomicInteger +<<<<<<< HEAD +import scala.collection.mutable +import scala.collection.JavaConverters._ +======= import scala.collection.JavaConverters._ import scala.collection.mutable +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.reflect.ClassTag import org.apache.spark.Logging diff --git a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicCheckpointer.scala b/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicCheckpointer.scala index 57ca4d3464f15..5a701a078b33a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicCheckpointer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/impl/PeriodicCheckpointer.scala @@ -19,9 +19,15 @@ package org.apache.spark.mllib.impl import scala.collection.mutable +<<<<<<< HEAD +import org.apache.hadoop.fs.{Path, FileSystem} + +import org.apache.spark.{SparkContext, Logging} +======= import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.{Logging, SparkContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.storage.StorageLevel diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index bb94745f078e8..b41cacca71ffd 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -19,7 +19,11 @@ package org.apache.spark.mllib.linalg import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import com.github.fommil.netlib.ARPACK +<<<<<<< HEAD +import org.netlib.util.{intW, doubleW} +======= import org.netlib.util.{doubleW, intW} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Compute eigen-decomposition. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index d7a74db0b1fd8..6f77133f862a3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -19,7 +19,11 @@ package org.apache.spark.mllib.linalg import java.util.{Arrays, Random} +<<<<<<< HEAD +import scala.collection.mutable.{ArrayBuilder => MArrayBuilder, HashSet => MHashSet, ArrayBuffer} +======= import scala.collection.mutable.{ArrayBuffer, ArrayBuilder => MArrayBuilder, HashSet => MHashSet} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import breeze.linalg.{CSCMatrix => BSM, DenseMatrix => BDM, Matrix => BM} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index cecfd067bd874..c7afa688fe491 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -17,8 +17,13 @@ package org.apache.spark.mllib.linalg +<<<<<<< HEAD +import java.util +import java.lang.{Double => JavaDouble, Integer => JavaInteger, Iterable => JavaIterable} +======= import java.lang.{Double => JavaDouble, Integer => JavaInteger, Iterable => JavaIterable} import java.util +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.annotation.varargs import scala.collection.JavaConverters._ @@ -26,7 +31,11 @@ import scala.collection.JavaConverters._ import breeze.linalg.{DenseVector => BDV, SparseVector => BSV, Vector => BV} import org.json4s.DefaultFormats import org.json4s.JsonDSL._ +<<<<<<< HEAD +import org.json4s.jackson.JsonMethods.{compact, render, parse => parseJson} +======= import org.json4s.jackson.JsonMethods.{compact, parse => parseJson, render} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.SparkException import org.apache.spark.annotation.{AlphaComponent, Since} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala index 97b03b340f20e..62a67ce2dbd0d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/CoordinateMatrix.scala @@ -20,8 +20,13 @@ package org.apache.spark.mllib.linalg.distributed import breeze.linalg.{DenseMatrix => BDM} import org.apache.spark.annotation.Since +<<<<<<< HEAD +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors} +======= import org.apache.spark.mllib.linalg.{Matrix, SparseMatrix, Vectors} import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Represents an entry in an distributed matrix. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala index e8de515211a18..41201c96b50e4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/IndexedRowMatrix.scala @@ -20,9 +20,15 @@ package org.apache.spark.mllib.linalg.distributed import breeze.linalg.{DenseMatrix => BDM} import org.apache.spark.annotation.Since +<<<<<<< HEAD +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.linalg._ +import org.apache.spark.mllib.linalg.SingularValueDecomposition +======= import org.apache.spark.mllib.linalg._ import org.apache.spark.mllib.linalg.SingularValueDecomposition import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Represents a row of [[org.apache.spark.mllib.linalg.distributed.IndexedRowMatrix]]. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index 0a36da4101339..f1ec0aab782c7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -21,8 +21,13 @@ import java.util.Arrays import scala.collection.mutable.ListBuffer +<<<<<<< HEAD +import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV, SparseVector => BSV, axpy => brzAxpy, + svd => brzSvd, MatrixSingularException, inv} +======= import breeze.linalg.{axpy => brzAxpy, inv, svd => brzSvd, DenseMatrix => BDM, DenseVector => BDV, MatrixSingularException, SparseVector => BSV} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import breeze.numerics.{sqrt => brzSqrt} import org.apache.spark.Logging @@ -30,8 +35,13 @@ import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg._ import org.apache.spark.mllib.stat.{MultivariateOnlineSummarizer, MultivariateStatisticalSummary} import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import org.apache.spark.util.random.XORShiftRandom +import org.apache.spark.storage.StorageLevel +======= import org.apache.spark.storage.StorageLevel import org.apache.spark.util.random.XORShiftRandom +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Represents a row-oriented distributed Matrix with no meaningful row indices. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala index 5873669b37e36..b066a5cd6db9d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/GradientDescent.scala @@ -19,12 +19,21 @@ package org.apache.spark.mllib.optimization import scala.collection.mutable.ArrayBuffer +<<<<<<< HEAD +import breeze.linalg.{DenseVector => BDV, norm} + +import org.apache.spark.annotation.{Experimental, DeveloperApi} +import org.apache.spark.Logging +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.linalg.{Vectors, Vector} +======= import breeze.linalg.{norm, DenseVector => BDV} import org.apache.spark.Logging import org.apache.spark.annotation.{DeveloperApi, Experimental} import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala index d8e56720967d8..978c15618873c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Optimizer.scala @@ -17,9 +17,16 @@ package org.apache.spark.mllib.optimization +<<<<<<< HEAD +import org.apache.spark.rdd.RDD + +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.mllib.linalg.Vector +======= import org.apache.spark.annotation.DeveloperApi import org.apache.spark.mllib.linalg.Vector import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala index 03c01e0553d78..90ba527e809c6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Updater.scala @@ -19,10 +19,17 @@ package org.apache.spark.mllib.optimization import scala.math._ +<<<<<<< HEAD +import breeze.linalg.{norm => brzNorm, axpy => brzAxpy, Vector => BV} + +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.mllib.linalg.{Vectors, Vector} +======= import breeze.linalg.{axpy => brzAxpy, norm => brzNorm, Vector => BV} import org.apache.spark.annotation.DeveloperApi import org.apache.spark.mllib.linalg.{Vector, Vectors} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala index fa04f8eb5e796..39b2c17465c99 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/random/RandomDataGenerator.scala @@ -19,8 +19,13 @@ package org.apache.spark.mllib.random import org.apache.commons.math3.distribution._ +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, DeveloperApi} +import org.apache.spark.util.random.{XORShiftRandom, Pseudorandom} +======= import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.util.random.{Pseudorandom, XORShiftRandom} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala index 92bc66949ae80..b5d3621d75c6d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/RandomRDD.scala @@ -17,15 +17,24 @@ package org.apache.spark.mllib.rdd +<<<<<<< HEAD +======= import scala.reflect.ClassTag import scala.util.Random +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.{Partition, SparkContext, TaskContext} import org.apache.spark.mllib.linalg.{DenseVector, Vector} import org.apache.spark.mllib.random.RandomDataGenerator import org.apache.spark.rdd.RDD import org.apache.spark.util.Utils +<<<<<<< HEAD +import scala.reflect.ClassTag +import scala.util.Random + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[mllib] class RandomRDDPartition[T](override val index: Int, val size: Int, val generator: RandomDataGenerator[T], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala index adb5e51947f6d..23aa7b40b9143 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/rdd/SlidingRDD.scala @@ -20,7 +20,11 @@ package org.apache.spark.mllib.rdd import scala.collection.mutable import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.spark.{TaskContext, Partition} +======= import org.apache.spark.{Partition, TaskContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD private[mllib] diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala index e60edc675c83f..31f9d420cf591 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala @@ -17,6 +17,15 @@ package org.apache.spark.mllib.regression +<<<<<<< HEAD +import org.apache.spark.annotation.{DeveloperApi, Since} +import org.apache.spark.mllib.feature.StandardScaler +import org.apache.spark.{Logging, SparkException} +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.optimization._ +import org.apache.spark.mllib.linalg.{Vectors, Vector} +import org.apache.spark.mllib.util.MLUtils._ +======= import org.apache.spark.{Logging, SparkException} import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.feature.StandardScaler @@ -24,6 +33,7 @@ import org.apache.spark.mllib.linalg.{Vector, Vectors} import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.util.MLUtils._ import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.storage.StorageLevel /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala index 45540f0c5c4ce..0b448e211c1a5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala @@ -20,7 +20,11 @@ package org.apache.spark.mllib.regression import scala.beans.BeanInfo import org.apache.spark.annotation.Since +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{Vectors, Vector} +======= import org.apache.spark.mllib.linalg.{Vector, Vectors} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.util.NumericParser import org.apache.spark.SparkException diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index d55e5dfdaaf53..af3e67c0eae7d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -23,7 +23,11 @@ import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.pmml.PMMLExportable import org.apache.spark.mllib.regression.impl.GLMRegressionModel +<<<<<<< HEAD +import org.apache.spark.mllib.util.{Saveable, Loader} +======= import org.apache.spark.mllib.util.{Loader, Saveable} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index 7da82c862a2b1..32ed2e42a5f7f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -23,7 +23,11 @@ import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.optimization._ import org.apache.spark.mllib.pmml.PMMLExportable import org.apache.spark.mllib.regression.impl.GLMRegressionModel +<<<<<<< HEAD +import org.apache.spark.mllib.util.{Saveable, Loader} +======= import org.apache.spark.mllib.util.{Loader, Saveable} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala index 98404be2603c7..13c1be03c5841 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/MultivariateOnlineSummarizer.scala @@ -18,7 +18,11 @@ package org.apache.spark.mllib.stat import org.apache.spark.annotation.{DeveloperApi, Since} +<<<<<<< HEAD +import org.apache.spark.mllib.linalg.{Vectors, Vector} +======= import org.apache.spark.mllib.linalg.{Vector, Vectors} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala index f3159f7e724cc..5a8f8c729a019 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/Statistics.scala @@ -20,9 +20,15 @@ package org.apache.spark.mllib.stat import scala.annotation.varargs import org.apache.spark.annotation.Since +<<<<<<< HEAD +import org.apache.spark.api.java.{JavaRDD, JavaDoubleRDD} +import org.apache.spark.mllib.linalg.distributed.RowMatrix +import org.apache.spark.mllib.linalg.{Matrix, Vector} +======= import org.apache.spark.api.java.{JavaDoubleRDD, JavaRDD} import org.apache.spark.mllib.linalg.{Matrix, Vector} import org.apache.spark.mllib.linalg.distributed.RowMatrix +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.mllib.stat.correlation.Correlations import org.apache.spark.mllib.stat.test.{ChiSqTest, ChiSqTestResult, KolmogorovSmirnovTest, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala index 052b5b1d65b07..8e85eae4ab9a4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/distribution/MultivariateGaussian.scala @@ -17,10 +17,17 @@ package org.apache.spark.mllib.stat.distribution +<<<<<<< HEAD +import breeze.linalg.{DenseVector => DBV, DenseMatrix => DBM, diag, max, eigSym, Vector => BV} + +import org.apache.spark.annotation.{DeveloperApi, Since} +import org.apache.spark.mllib.linalg.{Vectors, Vector, Matrices, Matrix} +======= import breeze.linalg.{diag, eigSym, max, DenseMatrix => DBM, DenseVector => DBV, Vector => BV} import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.util.MLUtils /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala index f22f2df320f0d..8f38bf2835b17 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala @@ -17,16 +17,28 @@ package org.apache.spark.mllib.stat.test +<<<<<<< HEAD +import breeze.linalg.{DenseMatrix => BDM} +import org.apache.commons.math3.distribution.ChiSquaredDistribution + +import org.apache.spark.{SparkException, Logging} +======= import scala.collection.mutable import breeze.linalg.{DenseMatrix => BDM} import org.apache.commons.math3.distribution.ChiSquaredDistribution import org.apache.spark.{Logging, SparkException} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.linalg.{Matrices, Matrix, Vector, Vectors} import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import scala.collection.mutable + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Conduct the chi-squared test for the input RDDs using the specified method. * Goodness-of-fit test is conducted on two `Vectors`, whereas test of independence is conducted diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala index 0b118a76733fd..a78a7400636ee 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/BoostingStrategy.scala @@ -21,7 +21,11 @@ import scala.beans.BeanProperty import org.apache.spark.annotation.Since import org.apache.spark.mllib.tree.configuration.Algo._ +<<<<<<< HEAD +import org.apache.spark.mllib.tree.loss.{LogLoss, SquaredError, Loss} +======= import org.apache.spark.mllib.tree.loss.{LogLoss, Loss, SquaredError} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Configuration options for [[org.apache.spark.mllib.tree.GradientBoostedTrees]]. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala index 6c04403f1ad75..f152486a4e19e 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/configuration/Strategy.scala @@ -21,9 +21,15 @@ import scala.beans.BeanProperty import scala.collection.JavaConverters._ import org.apache.spark.annotation.Since +<<<<<<< HEAD +import org.apache.spark.mllib.tree.impurity.{Variance, Entropy, Gini, Impurity} +import org.apache.spark.mllib.tree.configuration.Algo._ +import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ +======= import org.apache.spark.mllib.tree.configuration.Algo._ import org.apache.spark.mllib.tree.configuration.QuantileStrategy._ import org.apache.spark.mllib.tree.impurity.{Entropy, Gini, Impurity, Variance} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Stores all the configuration options for tree construction diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala index fbbec1197404a..7f918d175e0b6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/NodeIdCache.scala @@ -19,6 +19,15 @@ package org.apache.spark.mllib.tree.impl import scala.collection.mutable +<<<<<<< HEAD +import org.apache.hadoop.fs.{Path, FileSystem} + +import org.apache.spark.rdd.RDD +import org.apache.spark.annotation.DeveloperApi +import org.apache.spark.mllib.tree.configuration.FeatureType._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.mllib.tree.model.{Bin, Node, Split} +======= import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.spark.annotation.DeveloperApi @@ -26,6 +35,7 @@ import org.apache.spark.mllib.tree.configuration.FeatureType._ import org.apache.spark.mllib.tree.model.{Bin, Node, Split} import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala index 66f0908c1250f..ca34126c767a6 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Node.scala @@ -17,10 +17,17 @@ package org.apache.spark.mllib.tree.model +<<<<<<< HEAD +import org.apache.spark.annotation.{DeveloperApi, Since} +import org.apache.spark.Logging +import org.apache.spark.mllib.tree.configuration.FeatureType._ +import org.apache.spark.mllib.linalg.Vector +======= import org.apache.spark.Logging import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.Vector import org.apache.spark.mllib.tree.configuration.FeatureType._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala index 783a4acb55ce9..9b90c56446776 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/Split.scala @@ -18,6 +18,10 @@ package org.apache.spark.mllib.tree.model import org.apache.spark.annotation.{DeveloperApi, Since} +<<<<<<< HEAD +import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.tree.configuration.FeatureType import org.apache.spark.mllib.tree.configuration.FeatureType.FeatureType diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala index 68835bc79677f..5648019c0628f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LogisticRegressionDataGenerator.scala @@ -19,11 +19,19 @@ package org.apache.spark.mllib.util import scala.util.Random +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, DeveloperApi} +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.linalg.Vectors +======= import org.apache.spark.SparkContext import org.apache.spark.annotation.{DeveloperApi, Since} import org.apache.spark.mllib.linalg.Vectors import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.RDD +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala index 8af6750da4ff3..eadeb95006802 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MFDataGenerator.scala @@ -23,7 +23,11 @@ import scala.language.postfixOps import scala.util.Random import org.apache.spark.SparkContext +<<<<<<< HEAD +import org.apache.spark.annotation.{Since, DeveloperApi} +======= import org.apache.spark.annotation.{DeveloperApi, Since} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.linalg.{BLAS, DenseMatrix} import org.apache.spark.rdd.RDD diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala index 89186de96988f..c71e39e5157f8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala @@ -19,6 +19,17 @@ package org.apache.spark.mllib.util import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.spark.annotation.Since +import org.apache.spark.SparkContext +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.PartitionwiseSampledRDD +import org.apache.spark.util.random.BernoulliCellSampler +import org.apache.spark.mllib.regression.LabeledPoint +import org.apache.spark.mllib.linalg.{SparseVector, DenseVector, Vector, Vectors} +import org.apache.spark.mllib.linalg.BLAS.dot +import org.apache.spark.storage.StorageLevel +======= import org.apache.spark.SparkContext import org.apache.spark.annotation.Since import org.apache.spark.mllib.linalg.{DenseVector, SparseVector, Vector, Vectors} @@ -27,6 +38,7 @@ import org.apache.spark.mllib.regression.LabeledPoint import org.apache.spark.rdd.{PartitionwiseSampledRDD, RDD} import org.apache.spark.storage.StorageLevel import org.apache.spark.util.random.BernoulliCellSampler +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Helper methods to load, save and pre-process data used in ML Lib. diff --git a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala index 748868554fe65..8c311fbf99d3f 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/param/ParamsSuite.scala @@ -17,10 +17,14 @@ package org.apache.spark.ml.param +<<<<<<< HEAD +import org.apache.spark.SparkFunSuite +======= import java.io.{ByteArrayOutputStream, NotSerializableException, ObjectOutputStream} import org.apache.spark.SparkFunSuite import org.apache.spark.ml.util.MyParams +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mllib.linalg.{Vector, Vectors} class ParamsSuite extends SparkFunSuite { @@ -352,6 +356,8 @@ class ParamsSuite extends SparkFunSuite { val t3 = t.copy(ParamMap(t.maxIter -> 20)) assert(t3.isSet(t3.maxIter)) } +<<<<<<< HEAD +======= test("Filtering ParamMap") { val params1 = new MyParams("my_params1") @@ -377,6 +383,7 @@ class ParamsSuite extends SparkFunSuite { val objOut = new ObjectOutputStream(new ByteArrayOutputStream()) objOut.writeObject(filteredParamMap) } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } object ParamsSuite extends SparkFunSuite { diff --git a/pom.xml b/pom.xml index 398fcc92db994..fb7d833bf4833 100644 --- a/pom.xml +++ b/pom.xml @@ -1951,11 +1951,14 @@ +<<<<<<< HEAD +======= org.antlr antlr3-maven-plugin 3.5.2 +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 org.apache.maven.plugins @@ -2118,6 +2121,8 @@ maven-deploy-plugin 2.8.2 +<<<<<<< HEAD +======= org.apache.maven.plugins maven-dependency-plugin @@ -2135,6 +2140,7 @@ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 612ddf86ded3e..b4736b9cb7353 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -35,6 +35,10 @@ object MimaExcludes { def excludes(version: String) = version match { case v if v.startsWith("2.0") => Seq( +<<<<<<< HEAD + ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.SparkContext.emptyRDD") + ) ++ +======= // SPARK-7995 Remove AkkaRpcEnv ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.rpc.akka.AkkaFailure"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.rpc.akka.AkkaFailure$"), @@ -54,6 +58,7 @@ object MimaExcludes { ProblemFilters.exclude[IncompatibleTemplateDefProblem]( "org.apache.spark.mapred.SparkHadoopMapRedUtil") ) ++ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // When 1.6 is officially released, update this exclusion list. Seq( MimaBuild.excludeSparkPackage("deploy"), diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 588e97f64e054..440f13742a3b7 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -414,6 +414,11 @@ object Hive { // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce // in order to generate golden files. This is only required for developers who are adding new // new query tests. +<<<<<<< HEAD + fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") } + ) + +======= fullClasspath in Test := (fullClasspath in Test).value.filterNot { f => f.toString.contains("jcl-over") }, // ANTLR code-generation step. // @@ -459,6 +464,7 @@ object Hive { ((sourceManaged in Compile).value ** "*.tokens").get.toSeq }.taskValue ) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } object Assembly { diff --git a/project/plugins.sbt b/project/plugins.sbt index 15ba3a36d51ca..ec7f0f0fa363f 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -27,5 +27,8 @@ addSbtPlugin("io.spray" % "sbt-revolver" % "0.7.2") libraryDependencies += "org.ow2.asm" % "asm" % "5.0.3" libraryDependencies += "org.ow2.asm" % "asm-commons" % "5.0.3" +<<<<<<< HEAD +======= libraryDependencies += "org.antlr" % "antlr" % "3.5.2" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 diff --git a/python/pyspark/sql/context.py b/python/pyspark/sql/context.py index ba6915a12347e..ca0d93dfb52a8 100644 --- a/python/pyspark/sql/context.py +++ b/python/pyspark/sql/context.py @@ -18,7 +18,10 @@ import sys import warnings import json +<<<<<<< HEAD +======= from functools import reduce +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if sys.version >= '3': basestring = unicode = str @@ -237,9 +240,20 @@ def _inferSchemaFromList(self, data): if type(first) is dict: warnings.warn("inferring schema from dict is deprecated," "please use pyspark.sql.Row instead") +<<<<<<< HEAD + schema = _infer_schema(first) + if _has_nulltype(schema): + for r in data: + schema = _merge_type(schema, _infer_schema(r)) + if not _has_nulltype(schema): + break + else: + raise ValueError("Some of types cannot be determined after inferring") +======= schema = reduce(_merge_type, map(_infer_schema, data)) if _has_nulltype(schema): raise ValueError("Some of types cannot be determined after inferring") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return schema def _inferSchema(self, rdd, samplingRatio=None): diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index ad621df91064c..2832493d13497 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -608,16 +608,23 @@ def join(self, other, on=None, how=None): :param on: a string for join column name, a list of column names, , a join expression (Column) or a list of Columns. If `on` is a string or a list of string indicating the name of the join column(s), +<<<<<<< HEAD + the column(s) must exist on both sides, and this performs an inner equi-join. +======= the column(s) must exist on both sides, and this performs an equi-join. +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 :param how: str, default 'inner'. One of `inner`, `outer`, `left_outer`, `right_outer`, `leftsemi`. >>> df.join(df2, df.name == df2.name, 'outer').select(df.name, df2.height).collect() [Row(name=None, height=80), Row(name=u'Alice', height=None), Row(name=u'Bob', height=85)] +<<<<<<< HEAD +======= >>> df.join(df2, 'name', 'outer').select('name', 'height').collect() [Row(name=u'Tom', height=80), Row(name=u'Alice', height=None), Row(name=u'Bob', height=85)] +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 >>> cond = [df.name == df3.name, df.age == df3.age] >>> df.join(df3, cond, 'outer').select(df.name, df3.age).collect() [Row(name=u'Bob', age=5), Row(name=u'Alice', age=2)] diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index 10b99175ad952..53f20287a3d73 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -353,6 +353,8 @@ def test_apply_schema_to_row(self): df3 = self.sqlCtx.createDataFrame(rdd, df.schema) self.assertEqual(10, df3.count()) +<<<<<<< HEAD +======= def test_infer_schema_to_local(self): input = [{"a": 1}, {"b": "coffee"}] rdd = self.sc.parallelize(input) @@ -364,6 +366,7 @@ def test_infer_schema_to_local(self): df3 = self.sqlCtx.createDataFrame(rdd, df.schema) self.assertEqual(10, df3.count()) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def test_serialize_nested_array_and_map(self): d = [Row(l=[Row(a=1, b='s')], d={"key": Row(c=1.0, d="2")})] rdd = self.sc.parallelize(d) diff --git a/python/pyspark/streaming/dstream.py b/python/pyspark/streaming/dstream.py index 86447f5e58ecb..7c90b316516de 100644 --- a/python/pyspark/streaming/dstream.py +++ b/python/pyspark/streaming/dstream.py @@ -247,7 +247,11 @@ def countByValue(self): Return a new DStream in which each RDD contains the counts of each distinct value in each RDD of this DStream. """ +<<<<<<< HEAD + return self.map(lambda x: (x, None)).reduceByKey(lambda x, y: None).count() +======= return self.map(lambda x: (x, 1)).reduceByKey(lambda x, y: x+y) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def saveAsTextFiles(self, prefix, suffix=None): """ @@ -493,7 +497,11 @@ def countByValueAndWindow(self, windowDuration, slideDuration, numPartitions=Non keyed = self.map(lambda x: (x, 1)) counted = keyed.reduceByKeyAndWindow(operator.add, operator.sub, windowDuration, slideDuration, numPartitions) +<<<<<<< HEAD + return counted.filter(lambda kv: kv[1] > 0).count() +======= return counted.filter(lambda kv: kv[1] > 0) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def groupByKeyAndWindow(self, windowDuration, slideDuration, numPartitions=None): """ diff --git a/python/pyspark/streaming/tests.py b/python/pyspark/streaming/tests.py index 86b05d9fd2424..5f6b45254ed04 100644 --- a/python/pyspark/streaming/tests.py +++ b/python/pyspark/streaming/tests.py @@ -279,10 +279,15 @@ def test_countByValue(self): def func(dstream): return dstream.countByValue() +<<<<<<< HEAD + expected = [[4], [4], [3]] + self._test_func(input, func, expected) +======= expected = [[(1, 2), (2, 2), (3, 2), (4, 2)], [(5, 2), (6, 2), (7, 1), (8, 1)], [("a", 2), ("b", 1), ("", 1)]] self._test_func(input, func, expected, sort=True) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 def test_groupByKey(self): """Basic operation test for DStream.groupByKey.""" @@ -653,6 +658,9 @@ def test_count_by_value_and_window(self): def func(dstream): return dstream.countByValueAndWindow(2.5, .5) +<<<<<<< HEAD + expected = [[1], [2], [3], [4], [5], [6], [6], [6], [6], [6]] +======= expected = [[(0, 1)], [(0, 2), (1, 1)], [(0, 3), (1, 2), (2, 1)], @@ -663,6 +671,7 @@ def func(dstream): [(0, 3), (1, 3), (2, 3), (3, 3), (4, 2), (5, 1)], [(0, 2), (1, 2), (2, 2), (3, 2), (4, 2), (5, 1)], [(0, 1), (1, 1), (2, 1), (3, 1), (4, 1), (5, 1)]] +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 self._test_func(input, func, expected) def test_group_by_key_and_window(self): diff --git a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala index 2bf1be1a582b5..a3cb1b4a5aecb 100644 --- a/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala +++ b/repl/src/main/scala/org/apache/spark/repl/ExecutorClassLoader.scala @@ -17,7 +17,11 @@ package org.apache.spark.repl +<<<<<<< HEAD +import java.io.{FilterInputStream, ByteArrayOutputStream, InputStream, IOException} +======= import java.io.{ByteArrayOutputStream, FilterInputStream, InputStream, IOException} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.net.{HttpURLConnection, URI, URL, URLEncoder} import java.nio.channels.Channels @@ -27,9 +31,16 @@ import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.xbean.asm5._ import org.apache.xbean.asm5.Opcodes._ +<<<<<<< HEAD +import org.apache.spark.{SparkConf, SparkEnv, Logging} +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.util.Utils +import org.apache.spark.util.ParentClassLoader +======= import org.apache.spark.{Logging, SparkConf, SparkEnv} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.util.{ParentClassLoader, Utils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A ClassLoader that reads classes from a Hadoop FileSystem or HTTP URI, diff --git a/scalastyle-config.xml b/scalastyle-config.xml index ee855ca0e09cb..6b509663013da 100644 --- a/scalastyle-config.xml +++ b/scalastyle-config.xml @@ -187,6 +187,17 @@ This file is divided into 3 sections: scala.collection.JavaConverters._ and use .asScala / .asJava methods +<<<<<<< HEAD + + + ^getConfiguration$|^getTaskAttemptID$ + Instead of calling .getConfiguration() or .getTaskAttemptID() directly, + use SparkHadoopUtil's getConfigurationFromJobContext() and getTaskAttemptIDFromTaskAttemptContext() methods. + + + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 @@ -211,8 +222,13 @@ This file is divided into 3 sections: java,scala,3rdParty,spark +<<<<<<< HEAD + javax?\..+ + scala\..+ +======= javax?\..* scala\..* +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 (?!org\.apache\.spark\.).* org\.apache\.spark\..* diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java index 3d80df227151d..846110dd0a258 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeArrayData.java @@ -270,8 +270,13 @@ public UnsafeRow getStruct(int ordinal, int numFields) { final int offset = getElementOffset(ordinal); if (offset < 0) return null; final int size = getElementSize(offset, ordinal); +<<<<<<< HEAD + final UnsafeRow row = new UnsafeRow(); + row.pointTo(baseObject, baseOffset + offset, numFields, size); +======= final UnsafeRow row = new UnsafeRow(numFields); row.pointTo(baseObject, baseOffset + offset, size); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return row; } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java index 7492b88c471a4..afb013f4ae7dd 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/UnsafeRow.java @@ -17,7 +17,15 @@ package org.apache.spark.sql.catalyst.expressions; +<<<<<<< HEAD +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.io.OutputStream; +======= import java.io.*; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.math.BigDecimal; import java.math.BigInteger; import java.nio.ByteBuffer; @@ -26,12 +34,35 @@ import java.util.HashSet; import java.util.Set; +<<<<<<< HEAD +import org.apache.spark.sql.types.ArrayType; +import org.apache.spark.sql.types.BinaryType; +import org.apache.spark.sql.types.BooleanType; +import org.apache.spark.sql.types.ByteType; +import org.apache.spark.sql.types.CalendarIntervalType; +import org.apache.spark.sql.types.DataType; +import org.apache.spark.sql.types.DateType; +import org.apache.spark.sql.types.Decimal; +import org.apache.spark.sql.types.DecimalType; +import org.apache.spark.sql.types.DoubleType; +import org.apache.spark.sql.types.FloatType; +import org.apache.spark.sql.types.IntegerType; +import org.apache.spark.sql.types.LongType; +import org.apache.spark.sql.types.MapType; +import org.apache.spark.sql.types.NullType; +import org.apache.spark.sql.types.ShortType; +import org.apache.spark.sql.types.StringType; +import org.apache.spark.sql.types.StructType; +import org.apache.spark.sql.types.TimestampType; +import org.apache.spark.sql.types.UserDefinedType; +======= import com.esotericsoftware.kryo.Kryo; import com.esotericsoftware.kryo.KryoSerializable; import com.esotericsoftware.kryo.io.Input; import com.esotericsoftware.kryo.io.Output; import org.apache.spark.sql.types.*; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.unsafe.Platform; import org.apache.spark.unsafe.array.ByteArrayMethods; import org.apache.spark.unsafe.bitset.BitSetMethods; @@ -39,9 +70,29 @@ import org.apache.spark.unsafe.types.CalendarInterval; import org.apache.spark.unsafe.types.UTF8String; +<<<<<<< HEAD +import static org.apache.spark.sql.types.DataTypes.BooleanType; +import static org.apache.spark.sql.types.DataTypes.ByteType; +import static org.apache.spark.sql.types.DataTypes.DateType; +import static org.apache.spark.sql.types.DataTypes.DoubleType; +import static org.apache.spark.sql.types.DataTypes.FloatType; +import static org.apache.spark.sql.types.DataTypes.IntegerType; +import static org.apache.spark.sql.types.DataTypes.LongType; +import static org.apache.spark.sql.types.DataTypes.NullType; +import static org.apache.spark.sql.types.DataTypes.ShortType; +import static org.apache.spark.sql.types.DataTypes.TimestampType; +import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET; + +import com.esotericsoftware.kryo.Kryo; +import com.esotericsoftware.kryo.KryoSerializable; +import com.esotericsoftware.kryo.io.Input; +import com.esotericsoftware.kryo.io.Output; + +======= import static org.apache.spark.sql.types.DataTypes.*; import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * An Unsafe implementation of Row which is backed by raw memory instead of Java objects. * @@ -135,6 +186,10 @@ private void assertIndexIsValid(int index) { /** * Construct a new UnsafeRow. The resulting row won't be usable until `pointTo()` has been called, * since the value returned by this constructor is equivalent to a null pointer. +<<<<<<< HEAD + */ + public UnsafeRow() { } +======= * * @param numFields the number of fields in this row */ @@ -145,6 +200,7 @@ public UnsafeRow(int numFields) { // for serializer public UnsafeRow() {} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 public Object getBaseObject() { return baseObject; } public long getBaseOffset() { return baseOffset; } @@ -158,12 +214,24 @@ public UnsafeRow() {} * * @param baseObject the base object * @param baseOffset the offset within the base object +<<<<<<< HEAD + * @param numFields the number of fields in this row + * @param sizeInBytes the size of this row's backing data, in bytes + */ + public void pointTo(Object baseObject, long baseOffset, int numFields, int sizeInBytes) { + assert numFields >= 0 : "numFields (" + numFields + ") should >= 0"; + this.bitSetWidthInBytes = calculateBitSetWidthInBytes(numFields); + this.baseObject = baseObject; + this.baseOffset = baseOffset; + this.numFields = numFields; +======= * @param sizeInBytes the size of this row's backing data, in bytes */ public void pointTo(Object baseObject, long baseOffset, int sizeInBytes) { assert numFields >= 0 : "numFields (" + numFields + ") should >= 0"; this.baseObject = baseObject; this.baseOffset = baseOffset; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 this.sizeInBytes = sizeInBytes; } @@ -171,12 +239,32 @@ public void pointTo(Object baseObject, long baseOffset, int sizeInBytes) { * Update this UnsafeRow to point to the underlying byte array. * * @param buf byte array to point to +<<<<<<< HEAD + * @param numFields the number of fields in this row + * @param sizeInBytes the number of bytes valid in the byte array + */ + public void pointTo(byte[] buf, int numFields, int sizeInBytes) { + pointTo(buf, Platform.BYTE_ARRAY_OFFSET, numFields, sizeInBytes); + } + + /** + * Updates this UnsafeRow preserving the number of fields. + * @param buf byte array to point to + * @param sizeInBytes the number of bytes valid in the byte array + */ + public void pointTo(byte[] buf, int sizeInBytes) { + pointTo(buf, numFields, sizeInBytes); + } + + +======= * @param sizeInBytes the number of bytes valid in the byte array */ public void pointTo(byte[] buf, int sizeInBytes) { pointTo(buf, Platform.BYTE_ARRAY_OFFSET, sizeInBytes); } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 public void setNotNullAt(int i) { assertIndexIsValid(i); BitSetMethods.unset(baseObject, baseOffset, i); @@ -451,8 +539,13 @@ public UnsafeRow getStruct(int ordinal, int numFields) { final long offsetAndSize = getLong(ordinal); final int offset = (int) (offsetAndSize >> 32); final int size = (int) offsetAndSize; +<<<<<<< HEAD + final UnsafeRow row = new UnsafeRow(); + row.pointTo(baseObject, baseOffset + offset, numFields, size); +======= final UnsafeRow row = new UnsafeRow(numFields); row.pointTo(baseObject, baseOffset + offset, size); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return row; } } @@ -491,7 +584,11 @@ public UnsafeMapData getMap(int ordinal) { */ @Override public UnsafeRow copy() { +<<<<<<< HEAD + UnsafeRow rowCopy = new UnsafeRow(); +======= UnsafeRow rowCopy = new UnsafeRow(numFields); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 final byte[] rowDataCopy = new byte[sizeInBytes]; Platform.copyMemory( baseObject, @@ -500,7 +597,11 @@ public UnsafeRow copy() { Platform.BYTE_ARRAY_OFFSET, sizeInBytes ); +<<<<<<< HEAD + rowCopy.pointTo(rowDataCopy, Platform.BYTE_ARRAY_OFFSET, numFields, sizeInBytes); +======= rowCopy.pointTo(rowDataCopy, Platform.BYTE_ARRAY_OFFSET, sizeInBytes); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return rowCopy; } @@ -509,8 +610,13 @@ public UnsafeRow copy() { * The returned row is invalid until we call copyFrom on it. */ public static UnsafeRow createFromByteArray(int numBytes, int numFields) { +<<<<<<< HEAD + final UnsafeRow row = new UnsafeRow(); + row.pointTo(new byte[numBytes], numFields, numBytes); +======= final UnsafeRow row = new UnsafeRow(numFields); row.pointTo(new byte[numBytes], numBytes); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return row; } diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java index 27ae62f1212f6..857195c87819d 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/execution/UnsafeExternalRowSorter.java @@ -26,9 +26,16 @@ import org.apache.spark.SparkEnv; import org.apache.spark.TaskContext; +<<<<<<< HEAD +import org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator; +import org.apache.spark.sql.catalyst.InternalRow; +import org.apache.spark.sql.catalyst.expressions.UnsafeProjection; +import org.apache.spark.sql.catalyst.expressions.UnsafeRow; +======= import org.apache.spark.sql.catalyst.InternalRow; import org.apache.spark.sql.catalyst.expressions.UnsafeRow; import org.apache.spark.sql.catalyst.util.AbstractScalaRowIterator; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.types.StructType; import org.apache.spark.unsafe.Platform; import org.apache.spark.util.collection.unsafe.sort.PrefixComparator; @@ -122,7 +129,11 @@ Iterator sort() throws IOException { return new AbstractScalaRowIterator() { private final int numFields = schema.length(); +<<<<<<< HEAD + private UnsafeRow row = new UnsafeRow(); +======= private UnsafeRow row = new UnsafeRow(numFields); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 @Override public boolean hasNext() { @@ -136,6 +147,10 @@ public UnsafeRow next() { row.pointTo( sortedIterator.getBaseObject(), sortedIterator.getBaseOffset(), +<<<<<<< HEAD + numFields, +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 sortedIterator.getRecordLength()); if (!hasNext()) { UnsafeRow copy = row.copy(); // so that we don't have dangling pointers to freed page @@ -171,6 +186,13 @@ public Iterator sort(Iterator inputIterator) throws IOExce private static final class RowComparator extends RecordComparator { private final Ordering ordering; private final int numFields; +<<<<<<< HEAD + private final UnsafeRow row1 = new UnsafeRow(); + private final UnsafeRow row2 = new UnsafeRow(); + + public RowComparator(Ordering ordering, int numFields) { + this.numFields = numFields; +======= private final UnsafeRow row1; private final UnsafeRow row2; @@ -178,14 +200,20 @@ public RowComparator(Ordering ordering, int numFields) { this.numFields = numFields; this.row1 = new UnsafeRow(numFields); this.row2 = new UnsafeRow(numFields); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 this.ordering = ordering; } @Override public int compare(Object baseObj1, long baseOff1, Object baseObj2, long baseOff2) { // TODO: Why are the sizes -1? +<<<<<<< HEAD + row1.pointTo(baseObj1, baseOff1, numFields, -1); + row2.pointTo(baseObj2, baseOff2, numFields, -1); +======= row1.pointTo(baseObj1, baseOff1, -1); row2.pointTo(baseObj2, baseOff2, -1); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return ordering.compare(row1, row2); } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala index ed153d1f88945..c599e59c4e2fe 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala @@ -178,6 +178,21 @@ object JavaTypeInference { case c if !inferExternalType(c).isInstanceOf[ObjectType] => getPath case c if c == classOf[java.lang.Short] => +<<<<<<< HEAD + NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) + case c if c == classOf[java.lang.Integer] => + NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) + case c if c == classOf[java.lang.Long] => + NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) + case c if c == classOf[java.lang.Double] => + NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) + case c if c == classOf[java.lang.Byte] => + NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) + case c if c == classOf[java.lang.Float] => + NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) + case c if c == classOf[java.lang.Boolean] => + NewInstance(c, getPath :: Nil, propagateNull = true, ObjectType(c)) +======= NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.lang.Integer] => NewInstance(c, getPath :: Nil, ObjectType(c)) @@ -191,6 +206,7 @@ object JavaTypeInference { NewInstance(c, getPath :: Nil, ObjectType(c)) case c if c == classOf[java.lang.Boolean] => NewInstance(c, getPath :: Nil, ObjectType(c)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case c if c == classOf[java.sql.Date] => StaticInvoke( @@ -298,7 +314,11 @@ object JavaTypeInference { p.getWriteMethod.getName -> setter }.toMap +<<<<<<< HEAD + val newInstance = NewInstance(other, Nil, propagateNull = false, ObjectType(other)) +======= val newInstance = NewInstance(other, Nil, ObjectType(other), propagateNull = false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val result = InitializeJavaBean(newInstance, setters) if (path.nonEmpty) { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala index 9784c969665de..67b952da3735c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala @@ -189,37 +189,65 @@ object ScalaReflection extends ScalaReflection { case t if t <:< localTypeOf[java.lang.Integer] => val boxedType = classOf[java.lang.Integer] val objectType = ObjectType(boxedType) +<<<<<<< HEAD + NewInstance(boxedType, getPath :: Nil, propagateNull = true, objectType) +======= NewInstance(boxedType, getPath :: Nil, objectType) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case t if t <:< localTypeOf[java.lang.Long] => val boxedType = classOf[java.lang.Long] val objectType = ObjectType(boxedType) +<<<<<<< HEAD + NewInstance(boxedType, getPath :: Nil, propagateNull = true, objectType) +======= NewInstance(boxedType, getPath :: Nil, objectType) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case t if t <:< localTypeOf[java.lang.Double] => val boxedType = classOf[java.lang.Double] val objectType = ObjectType(boxedType) +<<<<<<< HEAD + NewInstance(boxedType, getPath :: Nil, propagateNull = true, objectType) +======= NewInstance(boxedType, getPath :: Nil, objectType) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case t if t <:< localTypeOf[java.lang.Float] => val boxedType = classOf[java.lang.Float] val objectType = ObjectType(boxedType) +<<<<<<< HEAD + NewInstance(boxedType, getPath :: Nil, propagateNull = true, objectType) +======= NewInstance(boxedType, getPath :: Nil, objectType) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case t if t <:< localTypeOf[java.lang.Short] => val boxedType = classOf[java.lang.Short] val objectType = ObjectType(boxedType) +<<<<<<< HEAD + NewInstance(boxedType, getPath :: Nil, propagateNull = true, objectType) +======= NewInstance(boxedType, getPath :: Nil, objectType) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case t if t <:< localTypeOf[java.lang.Byte] => val boxedType = classOf[java.lang.Byte] val objectType = ObjectType(boxedType) +<<<<<<< HEAD + NewInstance(boxedType, getPath :: Nil, propagateNull = true, objectType) +======= NewInstance(boxedType, getPath :: Nil, objectType) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case t if t <:< localTypeOf[java.lang.Boolean] => val boxedType = classOf[java.lang.Boolean] val objectType = ObjectType(boxedType) +<<<<<<< HEAD + NewInstance(boxedType, getPath :: Nil, propagateNull = true, objectType) +======= NewInstance(boxedType, getPath :: Nil, objectType) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case t if t <:< localTypeOf[java.sql.Date] => StaticInvoke( @@ -349,7 +377,11 @@ object ScalaReflection extends ScalaReflection { } } +<<<<<<< HEAD + val newInstance = NewInstance(cls, arguments, propagateNull = false, ObjectType(cls)) +======= val newInstance = NewInstance(cls, arguments, ObjectType(cls), propagateNull = false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (path.nonEmpty) { expressions.If( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala index a1be1473cc80b..3a236c80b8c94 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala @@ -57,7 +57,11 @@ trait CheckAnalysis { operator transformExpressionsUp { case a: Attribute if !a.resolved => val from = operator.inputSet.map(_.name).mkString(", ") +<<<<<<< HEAD + a.failAnalysis(s"cannot resolve '${a.prettyString}' given input columns $from") +======= a.failAnalysis(s"cannot resolve '${a.prettyString}' given input columns: [$from]") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case e: Expression if e.checkInputDataTypes().isFailure => e.checkInputDataTypes() match { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala index ad4beda9c4916..063bca4fec970 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala @@ -133,7 +133,11 @@ object ExpressionEncoder { } val fromRowExpression = +<<<<<<< HEAD + NewInstance(cls, fromRowExpressions, propagateNull = false, ObjectType(cls)) +======= NewInstance(cls, fromRowExpressions, ObjectType(cls), propagateNull = false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 new ExpressionEncoder[Any]( schema, diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala index 6f3d5ba84c9ae..3dcfa9e961e48 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala @@ -55,6 +55,10 @@ object RowEncoder { val obj = NewInstance( udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt(), Nil, +<<<<<<< HEAD + false, +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 dataType = ObjectType(udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt())) Invoke(obj, "serialize", udt.sqlType, inputObject :: Nil) @@ -165,6 +169,10 @@ object RowEncoder { val obj = NewInstance( udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt(), Nil, +<<<<<<< HEAD + false, +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 dataType = ObjectType(udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt())) Invoke(obj, "deserialize", ObjectType(udt.userClass), input :: Nil) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala index 64d397bf848a9..52ed0dc443317 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ScalaUDF.scala @@ -30,10 +30,14 @@ import org.apache.spark.sql.types.DataType * null. Use boxed type or [[Option]] if you wanna do the null-handling yourself. * @param dataType Return type of function. * @param children The input expressions of this UDF. +<<<<<<< HEAD + * @param inputTypes The expected input types of this UDF. +======= * @param inputTypes The expected input types of this UDF, used to perform type coercion. If we do * not want to perform coercion, simply use "Nil". Note that it would've been * better to use Option of Seq[DataType] so we can use "None" as the case for no * type coercion. However, that would require more refactoring of the codebase. +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 */ case class ScalaUDF( function: AnyRef, @@ -46,7 +50,11 @@ case class ScalaUDF( override def toString: String = s"UDF(${children.mkString(",")})" +<<<<<<< HEAD + // scalastyle:off +======= // scalastyle:off line.size.limit +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** This method has been generated by this script @@ -972,7 +980,11 @@ case class ScalaUDF( } } +<<<<<<< HEAD + // scalastyle:on +======= // scalastyle:on line.size.limit +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Generate codes used to convert the arguments to Scala type for user-defined funtions private[this] def genCodeForConverter(ctx: CodeGenContext, index: Int): String = { @@ -1013,7 +1025,11 @@ case class ScalaUDF( // This must be called before children expressions' codegen // because ctx.references is used in genCodeForConverter +<<<<<<< HEAD + val converterTerms = (0 until children.size).map(genCodeForConverter(ctx, _)) +======= val converterTerms = children.indices.map(genCodeForConverter(ctx, _)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Initialize user-defined function val funcClassName = s"scala.Function${children.size}" @@ -1057,6 +1073,9 @@ case class ScalaUDF( } private[this] val converter = CatalystTypeConverters.createToCatalystConverter(dataType) +<<<<<<< HEAD +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def eval(input: InternalRow): Any = converter(f(input)) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala index d0e031f27990c..1f6f717c275c6 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeProjection.scala @@ -289,7 +289,11 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro val exprTypes = expressions.map(_.dataType) val result = ctx.freshName("result") +<<<<<<< HEAD + ctx.addMutableState("UnsafeRow", result, s"this.$result = new UnsafeRow();") +======= ctx.addMutableState("UnsafeRow", result, s"$result = new UnsafeRow(${expressions.length});") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val bufferHolder = ctx.freshName("bufferHolder") val holderClass = classOf[BufferHolder].getName ctx.addMutableState(holderClass, bufferHolder, s"this.$bufferHolder = new $holderClass();") @@ -303,7 +307,11 @@ object GenerateUnsafeProjection extends CodeGenerator[Seq[Expression], UnsafePro $subexprReset ${writeExpressionsToBuffer(ctx, ctx.INPUT_ROW, exprEvals, exprTypes, bufferHolder)} +<<<<<<< HEAD + $result.pointTo($bufferHolder.buffer, ${expressions.length}, $bufferHolder.totalSize()); +======= $result.pointTo($bufferHolder.buffer, $bufferHolder.totalSize()); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 """ GeneratedExpressionCode(code, "false", result) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala index c9ff357bf3476..927f88c69784c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoiner.scala @@ -165,7 +165,11 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U | |class SpecificUnsafeRowJoiner extends ${classOf[UnsafeRowJoiner].getName} { | private byte[] buf = new byte[64]; +<<<<<<< HEAD + | private UnsafeRow out = new UnsafeRow(); +======= | private UnsafeRow out = new UnsafeRow(${schema1.size + schema2.size}); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 | | public UnsafeRow join(UnsafeRow row1, UnsafeRow row2) { | // row1: ${schema1.size} fields, $bitset1Words words in bitset @@ -188,7 +192,11 @@ object GenerateUnsafeRowJoiner extends CodeGenerator[(StructType, StructType), U | $copyVariableLengthRow2 | $updateOffset | +<<<<<<< HEAD + | out.pointTo(buf, ${schema1.size + schema2.size}, sizeInBytes - $sizeReduction); +======= | out.pointTo(buf, sizeInBytes - $sizeReduction); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 | | return out; | } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala index d0ec99b2320df..5fb5b6d2e9b07 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala @@ -57,10 +57,16 @@ case class Md5(child: Expression) extends UnaryExpression with ImplicitCastInput * the hash length is not one of the permitted values, the return value is NULL. */ @ExpressionDescription( +<<<<<<< HEAD + usage = "_FUNC_(input, bitLength) - Returns a checksum of SHA-2 family as a hex string of the " + + "input. SHA-224, SHA-256, SHA-384, and SHA-512 are supported. Bit length of 0 is equivalent " + + "to 256", +======= usage = """_FUNC_(input, bitLength) - Returns a checksum of SHA-2 family as a hex string of the input. SHA-224, SHA-256, SHA-384, and SHA-512 are supported. Bit length of 0 is equivalent to 256.""" , +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 extended = "> SELECT _FUNC_('Spark', 0);\n " + "'529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b'") case class Sha2(left: Expression, right: Expression) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala index fb404c12d5a04..173aaccb0235b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala @@ -165,7 +165,11 @@ case class Invoke( ${obj.code} ${argGen.map(_.code).mkString("\n")} +<<<<<<< HEAD + boolean ${ev.isNull} = ${obj.value} == null; +======= boolean ${ev.isNull} = ${obj.isNull}; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 $javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : ($javaType) $value; @@ -178,8 +182,13 @@ object NewInstance { def apply( cls: Class[_], arguments: Seq[Expression], +<<<<<<< HEAD + propagateNull: Boolean = false, + dataType: DataType): NewInstance = +======= dataType: DataType, propagateNull: Boolean = true): NewInstance = +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 new NewInstance(cls, arguments, propagateNull, dataType, None) } @@ -231,7 +240,11 @@ case class NewInstance( s"new $className($argString)" } +<<<<<<< HEAD + if (propagateNull) { +======= if (propagateNull && argGen.nonEmpty) { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val argsNonNull = s"!(${argGen.map(_.isNull).mkString(" || ")})" s""" @@ -248,8 +261,13 @@ case class NewInstance( s""" $setup +<<<<<<< HEAD + $javaType ${ev.value} = $constructorCall; + final boolean ${ev.isNull} = ${ev.value} == null; +======= final $javaType ${ev.value} = $constructorCall; final boolean ${ev.isNull} = false; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 """ } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index f1a333b8e56a7..d71295a93ef81 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -314,8 +314,13 @@ abstract class OffsetWindowFunction val offset: Expression /** +<<<<<<< HEAD + * Direction (above = 1/below = -1) of the number of rows between the current row and the row + * where the input expression is evaluated. +======= * Direction of the number of rows between the current row and the row where the input expression * is evaluated. +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 */ val direction: SortDirection @@ -327,7 +332,11 @@ abstract class OffsetWindowFunction * both the input and the default expression are foldable, the result is still not foldable due to * the frame. */ +<<<<<<< HEAD + override def foldable: Boolean = input.foldable && (default == null || default.foldable) +======= override def foldable: Boolean = false +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def nullable: Boolean = default == null || default.nullable @@ -353,6 +362,8 @@ abstract class OffsetWindowFunction override def toString: String = s"$prettyName($input, $offset, $default)" } +<<<<<<< HEAD +======= /** * The Lead function returns the value of 'x' at 'offset' rows after the current row in the window. * Offsets start at 0, which is the current row. The offset must be constant integer value. The @@ -368,6 +379,7 @@ abstract class OffsetWindowFunction @ExpressionDescription(usage = """_FUNC_(input, offset, default) - LEAD returns the value of 'x' at 'offset' rows after the current row in the window""") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class Lead(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -380,6 +392,8 @@ case class Lead(input: Expression, offset: Expression, default: Expression) override val direction = Ascending } +<<<<<<< HEAD +======= /** * The Lag function returns the value of 'x' at 'offset' rows before the current row in the window. * Offsets start at 0, which is the current row. The offset must be constant integer value. The @@ -395,6 +409,7 @@ case class Lead(input: Expression, offset: Expression, default: Expression) @ExpressionDescription(usage = """_FUNC_(input, offset, default) - LAG returns the value of 'x' at 'offset' rows before the current row in the window""") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class Lag(input: Expression, offset: Expression, default: Expression) extends OffsetWindowFunction { @@ -439,6 +454,8 @@ object SizeBasedWindowFunction { val n = AttributeReference("window__partition__size", IntegerType, nullable = false)() } +<<<<<<< HEAD +======= /** * The RowNumber function computes a unique, sequential number to each row, starting with one, * according to the ordering of rows within the window partition. @@ -449,10 +466,13 @@ object SizeBasedWindowFunction { """_FUNC_() - The ROW_NUMBER() function assigns a unique, sequential number to each row, starting with one, according to the ordering of rows within the window partition.""") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class RowNumber() extends RowNumberLike { override val evaluateExpression = rowNumber } +<<<<<<< HEAD +======= /** * The CumeDist function computes the position of a value relative to a all values in the partition. * The result is the number of rows preceding or equal to the current row in the ordering of the @@ -464,6 +484,7 @@ case class RowNumber() extends RowNumberLike { @ExpressionDescription(usage = """_FUNC_() - The CUME_DIST() function computes the position of a value relative to a all values in the partition.""") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { override def dataType: DataType = DoubleType // The frame for CUME_DIST is Range based instead of Row based, because CUME_DIST must @@ -472,6 +493,8 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { override val evaluateExpression = Divide(Cast(rowNumber, DoubleType), Cast(n, DoubleType)) } +<<<<<<< HEAD +======= /** * The NTile function divides the rows for each window partition into 'n' buckets ranging from 1 to * at most 'n'. Bucket values will differ by at most 1. If the number of rows in the partition does @@ -496,6 +519,7 @@ case class CumeDist() extends RowNumberLike with SizeBasedWindowFunction { @ExpressionDescription(usage = """_FUNC_(x) - The NTILE(n) function divides the rows for each window partition into 'n' buckets ranging from 1 to at most 'n'.""") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindowFunction { def this() = this(Literal(1)) @@ -549,8 +573,11 @@ case class NTile(buckets: Expression) extends RowNumberLike with SizeBasedWindow * the order of the window in which is processed. For instance, when the value of 'x' changes in a * window ordered by 'x' the rank function also changes. The size of the change of the rank function * is (typically) not dependent on the size of the change in 'x'. +<<<<<<< HEAD +======= * * This documentation has been based upon similar documentation for the Hive and Presto projects. +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 */ abstract class RankLike extends AggregateWindowFunction { override def inputTypes: Seq[AbstractDataType] = children.map(_ => AnyDataType) @@ -590,6 +617,8 @@ abstract class RankLike extends AggregateWindowFunction { def withOrder(order: Seq[Expression]): RankLike } +<<<<<<< HEAD +======= /** * The Rank function computes the rank of a value in a group of values. The result is one plus the * number of rows preceding or equal to the current row in the ordering of the partition. Tie values @@ -605,11 +634,14 @@ abstract class RankLike extends AggregateWindowFunction { """_FUNC_() - RANK() computes the rank of a value in a group of values. The result is one plus the number of rows preceding or equal to the current row in the ordering of the partition. Tie values will produce gaps in the sequence.""") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class Rank(children: Seq[Expression]) extends RankLike { def this() = this(Nil) override def withOrder(order: Seq[Expression]): Rank = Rank(order) } +<<<<<<< HEAD +======= /** * The DenseRank function computes the rank of a value in a group of values. The result is one plus * the previously assigned rank value. Unlike Rank, DenseRank will not produce gaps in the ranking @@ -625,6 +657,7 @@ case class Rank(children: Seq[Expression]) extends RankLike { """_FUNC_() - The DENSE_RANK() function computes the rank of a value in a group of values. The result is one plus the previously assigned rank value. Unlike Rank, DenseRank will not produce gaps in the ranking sequence.""") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class DenseRank(children: Seq[Expression]) extends RankLike { def this() = this(Nil) override def withOrder(order: Seq[Expression]): DenseRank = DenseRank(order) @@ -634,6 +667,8 @@ case class DenseRank(children: Seq[Expression]) extends RankLike { override val initialValues = zero +: orderInit } +<<<<<<< HEAD +======= /** * The PercentRank function computes the percentage ranking of a value in a group of values. The * result the rank of the minus one divided by the total number of rows in the partitiion minus one: @@ -651,6 +686,7 @@ case class DenseRank(children: Seq[Expression]) extends RankLike { @ExpressionDescription(usage = """_FUNC_() - PERCENT_RANK() The PercentRank function computes the percentage ranking of a value in a group of values.""") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class PercentRank(children: Seq[Expression]) extends RankLike with SizeBasedWindowFunction { def this() = this(Nil) override def withOrder(order: Seq[Expression]): PercentRank = PercentRank(order) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 0b1c74293bb8b..a21dcdc5b3ba0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -28,12 +28,19 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ import org.apache.spark.sql.types._ +<<<<<<< HEAD +abstract class Optimizer extends RuleExecutor[LogicalPlan] + +object DefaultOptimizer extends Optimizer { + val batches = +======= /** * Abstract class all optimizers should inherit of, contains the standard batches (extending * Optimizers can override this. */ abstract class Optimizer extends RuleExecutor[LogicalPlan] { def batches: Seq[Batch] = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // SubQueries are only needed for analysis and can be removed before execution. Batch("Remove SubQueries", FixedPoint(100), EliminateSubQueries) :: @@ -68,6 +75,11 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { DecimalAggregates) :: Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil +<<<<<<< HEAD +} + +/** +======= } } @@ -80,6 +92,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { object DefaultOptimizer extends Optimizer /** +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 * Pushes operations down into a Sample. */ object SamplePushDown extends Rule[LogicalPlan] { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala index b43b7ee71e7aa..161c53b1c1b18 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala @@ -44,6 +44,17 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy AttributeSet(children.flatMap(_.asInstanceOf[QueryPlan[PlanType]].output)) /** +<<<<<<< HEAD + * Attributes that are referenced by expressions but not provided by this nodes children. + * Subclasses should override this method if they produce attributes internally as it is used by + * assertions designed to prevent the construction of invalid plans. + * + * Note that virtual columns should be excluded. Currently, we only support the grouping ID + * virtual column. + */ + def missingInput: AttributeSet = + (references -- inputSet).filter(_.name != VirtualColumn.groupingIdName) +======= * The set of all attributes that are produced by this node. */ def producedAttributes: AttributeSet = AttributeSet.empty @@ -54,6 +65,7 @@ abstract class QueryPlan[PlanType <: TreeNode[PlanType]] extends TreeNode[PlanTy * assertions designed to prevent the construction of invalid plans. */ def missingInput: AttributeSet = references -- inputSet -- producedAttributes +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Runs [[transform]] with `rule` on all expressions present in this query operator. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala index 572d7d2f0b537..31b6e873af44a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala @@ -18,8 +18,13 @@ package org.apache.spark.sql.catalyst.plans.logical import org.apache.spark.sql.Row +<<<<<<< HEAD +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, analysis} +======= import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet} import org.apache.spark.sql.catalyst.{analysis, CatalystTypeConverters, InternalRow} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.types.{StructField, StructType} object LocalRelation { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala index 6d859551f8c52..c8ae18b674fe7 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala @@ -295,7 +295,10 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] with Logging { */ abstract class LeafNode extends LogicalPlan { override def children: Seq[LogicalPlan] = Nil +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala index 5f34d4a4eb73c..98a647c1c628b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicOperators.scala @@ -526,7 +526,11 @@ case class MapPartitions[T, U]( uEncoder: ExpressionEncoder[U], output: Seq[Attribute], child: LogicalPlan) extends UnaryNode { +<<<<<<< HEAD + override def missingInput: AttributeSet = AttributeSet.empty +======= override def producedAttributes: AttributeSet = outputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** Factory for constructing new `AppendColumn` nodes. */ @@ -552,7 +556,11 @@ case class AppendColumns[T, U]( newColumns: Seq[Attribute], child: LogicalPlan) extends UnaryNode { override def output: Seq[Attribute] = child.output ++ newColumns +<<<<<<< HEAD + override def missingInput: AttributeSet = super.missingInput -- newColumns +======= override def producedAttributes: AttributeSet = AttributeSet(newColumns) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** Factory for constructing new `MapGroups` nodes. */ @@ -587,7 +595,11 @@ case class MapGroups[K, T, U]( groupingAttributes: Seq[Attribute], output: Seq[Attribute], child: LogicalPlan) extends UnaryNode { +<<<<<<< HEAD + override def missingInput: AttributeSet = AttributeSet.empty +======= override def producedAttributes: AttributeSet = outputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** Factory for constructing new `CoGroup` nodes. */ @@ -630,5 +642,9 @@ case class CoGroup[Key, Left, Right, Result]( rightGroup: Seq[Attribute], left: LogicalPlan, right: LogicalPlan) extends BinaryNode { +<<<<<<< HEAD + override def missingInput: AttributeSet = AttributeSet.empty +======= override def producedAttributes: AttributeSet = outputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala index 62ea731ab5f38..8c04098c802c8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala @@ -59,7 +59,11 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging { protected case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*) /** Defines a sequence of rule batches, to be overridden by the implementation. */ +<<<<<<< HEAD + protected val batches: Seq[Batch] +======= protected def batches: Seq[Batch] +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala index bc36a55ae0ea2..f5ab4b17c5476 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala @@ -46,8 +46,13 @@ class EncoderResolutionSuite extends PlanTest { toExternalString('a.string), AssertNotNull('b.int.cast(LongType), cls.getName, "b", "Long") ), +<<<<<<< HEAD + false, + ObjectType(cls)) +======= ObjectType(cls), propagateNull = false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 compareExpressions(fromRowExpr, expected) } @@ -60,8 +65,13 @@ class EncoderResolutionSuite extends PlanTest { toExternalString('a.int.cast(StringType)), AssertNotNull('b.long, cls.getName, "b", "Long") ), +<<<<<<< HEAD + false, + ObjectType(cls)) +======= ObjectType(cls), propagateNull = false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 compareExpressions(fromRowExpr, expected) } } @@ -88,11 +98,19 @@ class EncoderResolutionSuite extends PlanTest { AssertNotNull( GetStructField('b.struct('a.int, 'b.long), 1, Some("b")), innerCls.getName, "b", "Long")), +<<<<<<< HEAD + false, + ObjectType(innerCls)) + )), + false, + ObjectType(cls)) +======= ObjectType(innerCls), propagateNull = false) )), ObjectType(cls), propagateNull = false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 compareExpressions(fromRowExpr, expected) } @@ -114,11 +132,19 @@ class EncoderResolutionSuite extends PlanTest { AssertNotNull( GetStructField('a.struct('a.string, 'b.byte), 1, Some("b")).cast(LongType), cls.getName, "b", "Long")), +<<<<<<< HEAD + false, + ObjectType(cls)), + 'b.int.cast(LongType)), + false, + ObjectType(classOf[Tuple2[_, _]])) +======= ObjectType(cls), propagateNull = false), 'b.int.cast(LongType)), ObjectType(classOf[Tuple2[_, _]]), propagateNull = false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 compareExpressions(fromRowExpr, expected) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala index 666699e18d4a5..0036067caef67 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala @@ -128,9 +128,12 @@ class ExpressionEncoderSuite extends SparkFunSuite { encodeDecodeTest(Map(1 -> "a", 2 -> null), "map with null") encodeDecodeTest(Map(1 -> Map("a" -> 1), 2 -> Map("b" -> 2)), "map of map") +<<<<<<< HEAD +======= encodeDecodeTest(Tuple1[Seq[Int]](null), "null seq in tuple") encodeDecodeTest(Tuple1[Map[String, String]](null), "null map in tuple") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Kryo encoders encodeDecodeTest("hello", "kryo string")(encoderFor(Encoders.kryo[String])) encodeDecodeTest(new KryoSerializable(15), "kryo object")( diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala index 8f4faab7bace5..42461bf2859ee 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala @@ -108,8 +108,12 @@ class RowEncoderSuite extends SparkFunSuite { .add("arrayOfArrayOfString", ArrayType(arrayOfString)) .add("arrayOfArrayOfInt", ArrayType(ArrayType(IntegerType))) .add("arrayOfMap", ArrayType(mapOfString)) +<<<<<<< HEAD + .add("arrayOfStruct", ArrayType(structOfString))) +======= .add("arrayOfStruct", ArrayType(structOfString)) .add("arrayOfUDT", arrayOfUDT)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 encodeDecodeTest( new StructType() @@ -131,6 +135,21 @@ class RowEncoderSuite extends SparkFunSuite { new StructType().add("array", arrayOfString).add("map", mapOfString)) .add("structOfUDT", structOfUDT)) +<<<<<<< HEAD + test(s"encode/decode: arrayOfUDT") { + val schema = new StructType() + .add("arrayOfUDT", arrayOfUDT) + + val encoder = RowEncoder(schema) + + val input: Row = Row(Seq(new ExamplePoint(0.1, 0.2), new ExamplePoint(0.3, 0.4))) + val row = encoder.toRow(input) + val convertedBack = encoder.fromRow(row) + assert(input.getSeq[ExamplePoint](0) == convertedBack.getSeq[ExamplePoint](0)) + } + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 test(s"encode/decode: Product") { val schema = new StructType() .add("structAsProduct", diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerBitsetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerBitsetSuite.scala index f8342214d9ae0..5361f69c9cc8a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerBitsetSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateUnsafeRowJoinerBitsetSuite.scala @@ -90,13 +90,21 @@ class GenerateUnsafeRowJoinerBitsetSuite extends SparkFunSuite { } private def createUnsafeRow(numFields: Int): UnsafeRow = { +<<<<<<< HEAD + val row = new UnsafeRow +======= val row = new UnsafeRow(numFields) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val sizeInBytes = numFields * 8 + ((numFields + 63) / 64) * 8 // Allocate a larger buffer than needed and point the UnsafeRow to somewhere in the middle. // This way we can test the joiner when the input UnsafeRows are not the entire arrays. val offset = numFields * 8 val buf = new Array[Byte](sizeInBytes + offset) +<<<<<<< HEAD + row.pointTo(buf, Platform.BYTE_ARRAY_OFFSET + offset, numFields, sizeInBytes) +======= row.pointTo(buf, Platform.BYTE_ARRAY_OFFSET + offset, sizeInBytes) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 row } diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java index 6bf9d7bd0367c..66c059b846bf0 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeFixedWidthAggregationMap.java @@ -61,7 +61,11 @@ public final class UnsafeFixedWidthAggregationMap { /** * Re-used pointer to the current aggregation buffer */ +<<<<<<< HEAD + private final UnsafeRow currentAggregationBuffer = new UnsafeRow(); +======= private final UnsafeRow currentAggregationBuffer; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private final boolean enablePerfMetrics; @@ -98,7 +102,10 @@ public UnsafeFixedWidthAggregationMap( long pageSizeBytes, boolean enablePerfMetrics) { this.aggregationBufferSchema = aggregationBufferSchema; +<<<<<<< HEAD +======= this.currentAggregationBuffer = new UnsafeRow(aggregationBufferSchema.length()); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 this.groupingKeyProjection = UnsafeProjection.create(groupingKeySchema); this.groupingKeySchema = groupingKeySchema; this.map = @@ -148,6 +155,10 @@ public UnsafeRow getAggregationBufferFromUnsafeRow(UnsafeRow unsafeGroupingKeyRo currentAggregationBuffer.pointTo( address.getBaseObject(), address.getBaseOffset(), +<<<<<<< HEAD + aggregationBufferSchema.length(), +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 loc.getValueLength() ); return currentAggregationBuffer; @@ -165,8 +176,13 @@ public KVIterator iterator() { private final BytesToBytesMap.MapIterator mapLocationIterator = map.destructiveIterator(); +<<<<<<< HEAD + private final UnsafeRow key = new UnsafeRow(); + private final UnsafeRow value = new UnsafeRow(); +======= private final UnsafeRow key = new UnsafeRow(groupingKeySchema.length()); private final UnsafeRow value = new UnsafeRow(aggregationBufferSchema.length()); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 @Override public boolean next() { @@ -177,11 +193,19 @@ public boolean next() { key.pointTo( keyAddress.getBaseObject(), keyAddress.getBaseOffset(), +<<<<<<< HEAD + groupingKeySchema.length(), +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 loc.getKeyLength() ); value.pointTo( valueAddress.getBaseObject(), valueAddress.getBaseOffset(), +<<<<<<< HEAD + aggregationBufferSchema.length(), +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 loc.getValueLength() ); return true; diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java index 0da26bf376a6a..a88185ad0d3b3 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/UnsafeKVExternalSorter.java @@ -94,7 +94,11 @@ public UnsafeKVExternalSorter( // The only new memory we are allocating is the pointer/prefix array. BytesToBytesMap.MapIterator iter = map.iterator(); final int numKeyFields = keySchema.size(); +<<<<<<< HEAD + UnsafeRow row = new UnsafeRow(); +======= UnsafeRow row = new UnsafeRow(numKeyFields); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 while (iter.hasNext()) { final BytesToBytesMap.Location loc = iter.next(); final Object baseObject = loc.getKeyAddress().getBaseObject(); @@ -107,7 +111,11 @@ public UnsafeKVExternalSorter( long address = taskMemoryManager.encodePageNumberAndOffset(page, baseOffset - 8); // Compute prefix +<<<<<<< HEAD + row.pointTo(baseObject, baseOffset, numKeyFields, loc.getKeyLength()); +======= row.pointTo(baseObject, baseOffset, loc.getKeyLength()); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 final long prefix = prefixComputer.computePrefix(row); inMemSorter.insertRecord(address, prefix); @@ -194,14 +202,22 @@ public void cleanupResources() { private static final class KVComparator extends RecordComparator { private final BaseOrdering ordering; +<<<<<<< HEAD + private final UnsafeRow row1 = new UnsafeRow(); + private final UnsafeRow row2 = new UnsafeRow(); +======= private final UnsafeRow row1; private final UnsafeRow row2; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private final int numKeyFields; public KVComparator(BaseOrdering ordering, int numKeyFields) { this.numKeyFields = numKeyFields; +<<<<<<< HEAD +======= this.row1 = new UnsafeRow(numKeyFields); this.row2 = new UnsafeRow(numKeyFields); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 this.ordering = ordering; } @@ -209,15 +225,27 @@ public KVComparator(BaseOrdering ordering, int numKeyFields) { public int compare(Object baseObj1, long baseOff1, Object baseObj2, long baseOff2) { // Note that since ordering doesn't need the total length of the record, we just pass -1 // into the row. +<<<<<<< HEAD + row1.pointTo(baseObj1, baseOff1 + 4, numKeyFields, -1); + row2.pointTo(baseObj2, baseOff2 + 4, numKeyFields, -1); +======= row1.pointTo(baseObj1, baseOff1 + 4, -1); row2.pointTo(baseObj2, baseOff2 + 4, -1); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return ordering.compare(row1, row2); } } public class KVSorterIterator extends KVIterator { +<<<<<<< HEAD + private UnsafeRow key = new UnsafeRow(); + private UnsafeRow value = new UnsafeRow(); + private final int numKeyFields = keySchema.size(); + private final int numValueFields = valueSchema.size(); +======= private UnsafeRow key = new UnsafeRow(keySchema.size()); private UnsafeRow value = new UnsafeRow(valueSchema.size()); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private final UnsafeSorterIterator underlying; private KVSorterIterator(UnsafeSorterIterator underlying) { @@ -237,8 +265,13 @@ public boolean next() throws IOException { // Note that recordLen = keyLen + valueLen + 4 bytes (for the keyLen itself) int keyLen = Platform.getInt(baseObj, recordOffset); int valueLen = recordLen - keyLen - 4; +<<<<<<< HEAD + key.pointTo(baseObj, recordOffset + 4, numKeyFields, keyLen); + value.pointTo(baseObj, recordOffset + 4 + keyLen, numValueFields, valueLen); +======= key.pointTo(baseObj, recordOffset + 4, keyLen); value.pointTo(baseObj, recordOffset + 4 + keyLen, valueLen); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return true; } else { diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java index a6758bddfa7d0..404db8d28515c 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/UnsafeRowParquetRecordReader.java @@ -21,19 +21,44 @@ import java.nio.ByteBuffer; import java.util.List; +<<<<<<< HEAD +import org.apache.spark.sql.catalyst.expressions.UnsafeRow; +import org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder; +import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter; +import org.apache.spark.sql.types.Decimal; +import org.apache.spark.unsafe.Platform; +import org.apache.spark.unsafe.types.UTF8String; + +import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL; +import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; +import static org.apache.parquet.column.ValuesType.VALUES; + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.parquet.Preconditions; import org.apache.parquet.column.ColumnDescriptor; import org.apache.parquet.column.Dictionary; import org.apache.parquet.column.Encoding; +<<<<<<< HEAD +import org.apache.parquet.column.page.DataPage; +import org.apache.parquet.column.page.DataPageV1; +import org.apache.parquet.column.page.DataPageV2; +import org.apache.parquet.column.page.DictionaryPage; +import org.apache.parquet.column.page.PageReadStore; +import org.apache.parquet.column.page.PageReader; +======= import org.apache.parquet.column.page.*; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.parquet.column.values.ValuesReader; import org.apache.parquet.io.api.Binary; import org.apache.parquet.schema.OriginalType; import org.apache.parquet.schema.PrimitiveType; import org.apache.parquet.schema.Type; +<<<<<<< HEAD +======= import org.apache.spark.sql.catalyst.expressions.UnsafeRow; import org.apache.spark.sql.catalyst.expressions.codegen.BufferHolder; import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter; @@ -43,6 +68,7 @@ import static org.apache.parquet.column.ValuesType.*; +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A specialized RecordReader that reads into UnsafeRows directly using the Parquet column APIs. * @@ -174,11 +200,20 @@ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptCont rowWriters = new UnsafeRowWriter[rows.length]; for (int i = 0; i < rows.length; ++i) { +<<<<<<< HEAD + rows[i] = new UnsafeRow(); + rowWriters[i] = new UnsafeRowWriter(); + BufferHolder holder = new BufferHolder(rowByteSize); + rowWriters[i].initialize(rows[i], holder, requestedSchema.getFieldCount()); + rows[i].pointTo(holder.buffer, Platform.BYTE_ARRAY_OFFSET, requestedSchema.getFieldCount(), + holder.buffer.length); +======= rows[i] = new UnsafeRow(requestedSchema.getFieldCount()); rowWriters[i] = new UnsafeRowWriter(); BufferHolder holder = new BufferHolder(rowByteSize); rowWriters[i].initialize(rows[i], holder, requestedSchema.getFieldCount()); rows[i].pointTo(holder.buffer, Platform.BYTE_ARRAY_OFFSET, holder.buffer.length); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 0acea95344c22..680a1c7bd6048 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -154,6 +154,15 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging { * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash * your external database systems. * +<<<<<<< HEAD + * @param url JDBC database url of the form `jdbc:subprotocol:subname` + * @param table Name of the table in the external database. + * @param columnName the name of a column of integral type that will be used for partitioning. + * @param lowerBound the minimum value of `columnName` used to decide partition stride + * @param upperBound the maximum value of `columnName` used to decide partition stride + * @param numPartitions the number of partitions. the range `minValue`-`maxValue` will be split + * evenly into this many partitions +======= * @param url JDBC database url of the form `jdbc:subprotocol:subname`. * @param table Name of the table in the external database. * @param columnName the name of a column of integral type that will be used for partitioning. @@ -162,6 +171,7 @@ class DataFrameReader private[sql](sqlContext: SQLContext) extends Logging { * @param numPartitions the number of partitions. This, along with `lowerBound` (inclusive), * `upperBound` (exclusive), form partition strides for generated WHERE * clause expressions used to split the column `columnName` evenly. +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 * @param connectionProperties JDBC database connection arguments, a list of arbitrary string * tag/value. Normally at least a "user" and "password" property * should be included. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index ab362539e2982..0355bccc8c806 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -297,12 +297,16 @@ final class DataFrameWriter private[sql](df: DataFrame) { if (!tableExists) { val schema = JdbcUtils.schemaString(df, url) val sql = s"CREATE TABLE $table ($schema)" +<<<<<<< HEAD + conn.createStatement.executeUpdate(sql) +======= val statement = conn.createStatement try { statement.executeUpdate(sql) } finally { statement.close() } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } finally { conn.close() diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala index b58a3739912bc..f85a751bbd8b1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLConf.scala @@ -334,8 +334,12 @@ private[spark] object SQLConf { val HIVE_VERIFY_PARTITION_PATH = booleanConf("spark.sql.hive.verifyPartitionPath", defaultValue = Some(false), +<<<<<<< HEAD + doc = "") +======= doc = "When true, check all the partition paths under the table\'s root directory " + "when reading data stored in HDFS.") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val HIVE_METASTORE_PARTITION_PRUNING = booleanConf("spark.sql.hive.metastorePartitionPruning", defaultValue = Some(false), @@ -353,7 +357,11 @@ private[spark] object SQLConf { val COLUMN_NAME_OF_CORRUPT_RECORD = stringConf("spark.sql.columnNameOfCorruptRecord", defaultValue = Some("_corrupt_record"), +<<<<<<< HEAD + doc = "") +======= doc = "The name of internal column for storing raw/un-parsed JSON records that fail to parse.") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val BROADCAST_TIMEOUT = intConf("spark.sql.broadcastTimeout", defaultValue = Some(5 * 60), @@ -414,8 +422,12 @@ private[spark] object SQLConf { val PARALLEL_PARTITION_DISCOVERY_THRESHOLD = intConf( key = "spark.sql.sources.parallelPartitionDiscovery.threshold", defaultValue = Some(32), +<<<<<<< HEAD + doc = "") +======= doc = "The degree of parallelism for schema merging and partition discovery of " + "Parquet data sources.") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Whether to perform eager analysis when constructing a dataframe. // Set to false when debugging requires the ability to look at invalid query plans. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala index f87a88d49744f..d89547722aa3b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala @@ -17,6 +17,11 @@ package org.apache.spark.sql +<<<<<<< HEAD +import java.util.{List => JList, Map => JMap} + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.reflect.runtime.universe.TypeTag import scala.util.Try @@ -67,7 +72,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { udaf } +<<<<<<< HEAD + // scalastyle:off +======= // scalastyle:off line.size.limit +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /* register 0-22 were generated by this script @@ -83,8 +92,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try($inputTypes).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try($inputTypes).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) }""") @@ -100,7 +114,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { | * Register a user-defined function with ${i} arguments. | * @since 1.3.0 | */ +<<<<<<< HEAD + |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType) = { +======= |def register(name: String, f: UDF$i[$extTypeArgs, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 | functionRegistry.registerFunction( | name, | (e: Seq[Expression]) => ScalaUDF(f$anyCast.call($anyParams), returnType, e)) @@ -115,8 +133,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -128,8 +151,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -141,8 +169,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -154,8 +187,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -167,8 +205,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -180,8 +223,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -193,8 +241,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -206,8 +259,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -219,8 +277,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -232,8 +295,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -245,8 +313,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -258,8 +331,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -271,8 +349,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -284,8 +367,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -297,8 +385,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -310,8 +403,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -323,8 +421,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -336,8 +439,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -349,8 +457,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -362,8 +475,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -375,8 +493,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -388,8 +511,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: ScalaReflection.schemaFor[A21].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: ScalaReflection.schemaFor[A21].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -401,8 +529,13 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { */ def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = { val dataType = ScalaReflection.schemaFor[RT].dataType +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: ScalaReflection.schemaFor[A21].dataType :: ScalaReflection.schemaFor[A22].dataType :: Nil).getOrElse(Nil) + def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes) +======= val inputTypes = Try(ScalaReflection.schemaFor[A1].dataType :: ScalaReflection.schemaFor[A2].dataType :: ScalaReflection.schemaFor[A3].dataType :: ScalaReflection.schemaFor[A4].dataType :: ScalaReflection.schemaFor[A5].dataType :: ScalaReflection.schemaFor[A6].dataType :: ScalaReflection.schemaFor[A7].dataType :: ScalaReflection.schemaFor[A8].dataType :: ScalaReflection.schemaFor[A9].dataType :: ScalaReflection.schemaFor[A10].dataType :: ScalaReflection.schemaFor[A11].dataType :: ScalaReflection.schemaFor[A12].dataType :: ScalaReflection.schemaFor[A13].dataType :: ScalaReflection.schemaFor[A14].dataType :: ScalaReflection.schemaFor[A15].dataType :: ScalaReflection.schemaFor[A16].dataType :: ScalaReflection.schemaFor[A17].dataType :: ScalaReflection.schemaFor[A18].dataType :: ScalaReflection.schemaFor[A19].dataType :: ScalaReflection.schemaFor[A20].dataType :: ScalaReflection.schemaFor[A21].dataType :: ScalaReflection.schemaFor[A22].dataType :: Nil).toOption def builder(e: Seq[Expression]) = ScalaUDF(func, dataType, e, inputTypes.getOrElse(Nil)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction(name, builder) UserDefinedFunction(func, dataType, inputTypes) } @@ -414,7 +547,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 1 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF1[_, _], returnType: DataType) = { +======= def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF1[Any, Any]].call(_: Any), returnType, e)) @@ -424,7 +561,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 2 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF2[_, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any), returnType, e)) @@ -434,7 +575,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 3 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF3[_, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any), returnType, e)) @@ -444,7 +589,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 4 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -454,7 +603,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 5 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -464,7 +617,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 6 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -474,7 +631,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 7 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -484,7 +645,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 8 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -494,7 +659,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 9 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -504,7 +673,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 10 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -514,7 +687,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 11 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -524,7 +701,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 12 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -534,7 +715,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 13 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -544,7 +729,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 14 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -554,7 +743,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 15 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -564,7 +757,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 16 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -574,7 +771,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 17 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -584,7 +785,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 18 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -594,7 +799,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 19 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -604,7 +813,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 20 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -614,7 +827,11 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 21 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) @@ -624,12 +841,20 @@ class UDFRegistration private[sql] (sqlContext: SQLContext) extends Logging { * Register a user-defined function with 22 arguments. * @since 1.3.0 */ +<<<<<<< HEAD + def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType) = { +======= def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 functionRegistry.registerFunction( name, (e: Seq[Expression]) => ScalaUDF(f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any), returnType, e)) } +<<<<<<< HEAD + // scalastyle:on +======= // scalastyle:on line.size.limit +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala index 2fb3bf07aa60b..145d0cd83869d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/UserDefinedFunction.scala @@ -44,10 +44,17 @@ import org.apache.spark.sql.types.DataType case class UserDefinedFunction protected[sql] ( f: AnyRef, dataType: DataType, +<<<<<<< HEAD + inputTypes: Seq[DataType] = Nil) { + + def apply(exprs: Column*): Column = { + Column(ScalaUDF(f, dataType, exprs.map(_.expr), inputTypes)) +======= inputTypes: Option[Seq[DataType]]) { def apply(exprs: Column*): Column = { Column(ScalaUDF(f, dataType, exprs.map(_.expr), inputTypes.getOrElse(Nil))) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala index 67da7b808bf59..375078530450d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/api/r/SQLUtils.scala @@ -32,7 +32,11 @@ private[r] object SQLUtils { SerDe.registerSqlSerDe((readSqlObject, writeSqlObject)) def createSQLContext(jsc: JavaSparkContext): SQLContext = { +<<<<<<< HEAD + new SQLContext(jsc) +======= SQLContext.getOrCreate(jsc.sc) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } def getJavaSparkContext(sqlCtx: SQLContext): JavaSparkContext = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala index 5c01af011d306..5cb9a9d4aa01d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala @@ -18,11 +18,19 @@ package org.apache.spark.sql.execution import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import org.apache.spark.sql.catalyst.{InternalRow, CatalystTypeConverters} +import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation +import org.apache.spark.sql.catalyst.expressions.{Attribute, GenericMutableRow} +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics} +import org.apache.spark.sql.sources.{HadoopFsRelation, BaseRelation} +======= import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, GenericMutableRow} import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Statistics} import org.apache.spark.sql.sources.{BaseRelation, HadoopFsRelation} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.types.DataType import org.apache.spark.sql.{Row, SQLContext} @@ -84,8 +92,11 @@ private[sql] case class LogicalRDD( case _ => false } +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 @transient override lazy val statistics: Statistics = Statistics( // TODO: Instead of returning a default value here, find a way to return a meaningful size // estimate for RDDs. See PR 1238 for more discussions. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala index 0c613e91b979f..8e345ba936142 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Generate.scala @@ -54,8 +54,11 @@ case class Generate( child: SparkPlan) extends UnaryNode { +<<<<<<< HEAD +======= override def expressions: Seq[Expression] = generator :: Nil +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val boundGenerator = BindReferences.bindReference(generator, child.output) protected override def doExecute(): RDD[InternalRow] = { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala index fe9b2ad4a0bc3..4a15c3dfed506 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala @@ -188,11 +188,19 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ val buf = new ArrayBuffer[InternalRow] val totalParts = childRDD.partitions.length +<<<<<<< HEAD + var partsScanned = 0L + while (buf.size < n && partsScanned < totalParts) { + // The number of partitions to try in this iteration. It is ok for this number to be + // greater than totalParts because we actually cap it at totalParts in runJob. + var numPartsToTry = 1L +======= var partsScanned = 0 while (buf.size < n && partsScanned < totalParts) { // The number of partitions to try in this iteration. It is ok for this number to be // greater than totalParts because we actually cap it at totalParts in runJob. var numPartsToTry = 1 +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (partsScanned > 0) { // If we didn't find any rows after the first iteration, just try all partitions next. // Otherwise, interpolate the number of partitions we need to try, but overestimate it @@ -206,13 +214,21 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ numPartsToTry = math.max(0, numPartsToTry) // guard against negative num of partitions val left = n - buf.size +<<<<<<< HEAD + val p = partsScanned.toInt until math.min(partsScanned + numPartsToTry, totalParts).toInt +======= val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val sc = sqlContext.sparkContext val res = sc.runJob(childRDD, (it: Iterator[InternalRow]) => it.take(left).toArray, p) res.foreach(buf ++= _.take(n - buf.size)) +<<<<<<< HEAD + partsScanned += p.size +======= partsScanned += numPartsToTry +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } buf.toArray @@ -279,7 +295,10 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ private[sql] trait LeafNode extends SparkPlan { override def children: Seq[SparkPlan] = Nil +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } private[sql] trait UnaryNode extends SparkPlan { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala index 4730647c4be9c..9aa2c9f8b8714 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/UnsafeRowSerializer.scala @@ -94,7 +94,11 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst private[this] val dIn: DataInputStream = new DataInputStream(new BufferedInputStream(in)) // 1024 is a default buffer size; this buffer will grow to accommodate larger rows private[this] var rowBuffer: Array[Byte] = new Array[Byte](1024) +<<<<<<< HEAD + private[this] var row: UnsafeRow = new UnsafeRow() +======= private[this] var row: UnsafeRow = new UnsafeRow(numFields) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[this] var rowTuple: (Int, UnsafeRow) = (0, row) private[this] val EOF: Int = -1 @@ -117,7 +121,11 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst rowBuffer = new Array[Byte](rowSize) } ByteStreams.readFully(dIn, rowBuffer, 0, rowSize) +<<<<<<< HEAD + row.pointTo(rowBuffer, Platform.BYTE_ARRAY_OFFSET, numFields, rowSize) +======= row.pointTo(rowBuffer, Platform.BYTE_ARRAY_OFFSET, rowSize) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 rowSize = readSize() if (rowSize == EOF) { // We are returning the last row in this stream dIn.close() @@ -152,7 +160,11 @@ private class UnsafeRowSerializerInstance(numFields: Int) extends SerializerInst rowBuffer = new Array[Byte](rowSize) } ByteStreams.readFully(dIn, rowBuffer, 0, rowSize) +<<<<<<< HEAD + row.pointTo(rowBuffer, Platform.BYTE_ARRAY_OFFSET, numFields, rowSize) +======= row.pointTo(rowBuffer, Platform.BYTE_ARRAY_OFFSET, rowSize) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 row.asInstanceOf[T] } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregate.scala index c4587ba677b2f..efda10d44b35b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregate.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregate.scala @@ -36,6 +36,8 @@ case class SortBasedAggregate( child: SparkPlan) extends UnaryNode { +<<<<<<< HEAD +======= private[this] val aggregateBufferAttributes = { aggregateExpressions.flatMap(_.aggregateFunction.aggBufferAttributes) } @@ -45,6 +47,7 @@ case class SortBasedAggregate( AttributeSet(resultExpressions.diff(groupingExpressions).map(_.toAttribute)) ++ AttributeSet(aggregateBufferAttributes) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override private[sql] lazy val metrics = Map( "numInputRows" -> SQLMetrics.createLongMetric(sparkContext, "number of input rows"), "numOutputRows" -> SQLMetrics.createLongMetric(sparkContext, "number of output rows")) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregate.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregate.scala index 9d758eb3b7c32..59027c072b76a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregate.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregate.scala @@ -55,11 +55,14 @@ case class TungstenAggregate( override def output: Seq[Attribute] = resultExpressions.map(_.toAttribute) +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = AttributeSet(aggregateAttributes) ++ AttributeSet(resultExpressions.diff(groupingExpressions).map(_.toAttribute)) ++ AttributeSet(aggregateBufferAttributes) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def requiredChildDistribution: List[Distribution] = { requiredChildDistributionExpressions match { case Some(exprs) if exprs.length == 0 => AllTuples :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala index f19d72f067218..444c07cee9859 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicOperators.scala @@ -369,11 +369,14 @@ case class MapPartitions[T, U]( uEncoder: ExpressionEncoder[U], output: Seq[Attribute], child: SparkPlan) extends UnaryNode { +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet override def canProcessSafeRows: Boolean = true override def canProcessUnsafeRows: Boolean = true override def outputsUnsafeRows: Boolean = true +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override protected def doExecute(): RDD[InternalRow] = { child.execute().mapPartitionsInternal { iter => @@ -392,12 +395,18 @@ case class AppendColumns[T, U]( uEncoder: ExpressionEncoder[U], newColumns: Seq[Attribute], child: SparkPlan) extends UnaryNode { +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = AttributeSet(newColumns) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // We are using an unsafe combiner. override def canProcessSafeRows: Boolean = false override def canProcessUnsafeRows: Boolean = true +<<<<<<< HEAD +======= override def outputsUnsafeRows: Boolean = true +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def output: Seq[Attribute] = child.output ++ newColumns @@ -426,11 +435,14 @@ case class MapGroups[K, T, U]( groupingAttributes: Seq[Attribute], output: Seq[Attribute], child: SparkPlan) extends UnaryNode { +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet override def canProcessSafeRows: Boolean = true override def canProcessUnsafeRows: Boolean = true override def outputsUnsafeRows: Boolean = true +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def requiredChildDistribution: Seq[Distribution] = ClusteredDistribution(groupingAttributes) :: Nil @@ -470,11 +482,14 @@ case class CoGroup[Key, Left, Right, Result]( rightGroup: Seq[Attribute], left: SparkPlan, right: SparkPlan) extends BinaryNode { +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet override def canProcessSafeRows: Boolean = true override def canProcessUnsafeRows: Boolean = true override def outputsUnsafeRows: Boolean = true +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def requiredChildDistribution: Seq[Distribution] = ClusteredDistribution(leftGroup) :: ClusteredDistribution(rightGroup) :: Nil diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala index 9c908b2877e79..390969e591ed3 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala @@ -574,10 +574,18 @@ private[columnar] case class STRUCT(dataType: StructType) assert(buffer.hasArray) val cursor = buffer.position() buffer.position(cursor + sizeInBytes) +<<<<<<< HEAD + val unsafeRow = new UnsafeRow + unsafeRow.pointTo( + buffer.array(), + Platform.BYTE_ARRAY_OFFSET + buffer.arrayOffset() + cursor, + numOfFields, +======= val unsafeRow = new UnsafeRow(numOfFields) unsafeRow.pointTo( buffer.array(), Platform.BYTE_ARRAY_OFFSET + buffer.arrayOffset() + cursor, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 sizeInBytes) unsafeRow } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala index b208425ffc3c3..311ba6d4dc4a2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala @@ -131,7 +131,11 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera private ByteOrder nativeOrder = null; private byte[][] buffers = null; +<<<<<<< HEAD + private UnsafeRow unsafeRow = new UnsafeRow(); +======= private UnsafeRow unsafeRow = new UnsafeRow($numFields); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private BufferHolder bufferHolder = new BufferHolder(); private UnsafeRowWriter rowWriter = new UnsafeRowWriter(); private MutableUnsafeRow mutableRow = null; @@ -183,7 +187,11 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera bufferHolder.reset(); rowWriter.initialize(bufferHolder, $numFields); ${extractors.mkString("\n")} +<<<<<<< HEAD + unsafeRow.pointTo(bufferHolder.buffer, $numFields, bufferHolder.totalSize()); +======= unsafeRow.pointTo(bufferHolder.buffer, bufferHolder.totalSize()); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 return unsafeRow; } }""" diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala index aa7a668e0e938..71841ecae77a7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarTableScan.scala @@ -66,8 +66,11 @@ private[sql] case class InMemoryRelation( private[sql] var _batchStats: Accumulable[ArrayBuffer[InternalRow], InternalRow] = null) extends LogicalPlan with MultiInstanceRelation { +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private val batchStats: Accumulable[ArrayBuffer[InternalRow], InternalRow] = if (_batchStats == null) { child.sqlContext.sparkContext.accumulableCollection(ArrayBuffer.empty[InternalRow]) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala index 6ec4cadeeb072..fb9af8160541b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/commands.scala @@ -148,6 +148,11 @@ case class SetCommand(kv: Option[(String, Option[String])]) extends RunnableComm } (keyValueOutput, runFunc) +<<<<<<< HEAD + (keyValueOutput, runFunc) + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case Some((SQLConf.Deprecated.SORTMERGE_JOIN, Some(value))) => val runFunc = (sqlContext: SQLContext) => { logWarning( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala index 3741a9cb32fd4..f0df379c6aa14 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala @@ -77,8 +77,12 @@ private[sql] object DataSourceStrategy extends Strategy with Logging { val pushedFilters = filters.filter(_.references.intersect(partitionColumns).isEmpty) // Predicates with both partition keys and attributes +<<<<<<< HEAD + val combineFilters = filters.toSet -- partitionFilters.toSet -- pushedFilters.toSet +======= val partitionAndNormalColumnFilters = filters.toSet -- partitionFilters.toSet -- pushedFilters.toSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val selectedPartitions = prunePartitions(partitionFilters, t.partitionSpec).toArray @@ -89,6 +93,11 @@ private[sql] object DataSourceStrategy extends Strategy with Logging { s"Selected $selected partitions out of $total, pruned $percentPruned% partitions." } +<<<<<<< HEAD + val scan = buildPartitionedTableScan( + l, + projects, +======= // need to add projections from "partitionAndNormalColumnAttrs" in if it is not empty val partitionAndNormalColumnAttrs = AttributeSet(partitionAndNormalColumnFilters) val partitionAndNormalColumnProjs = if (partitionAndNormalColumnAttrs.isEmpty) { @@ -100,10 +109,16 @@ private[sql] object DataSourceStrategy extends Strategy with Logging { val scan = buildPartitionedTableScan( l, partitionAndNormalColumnProjs, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 pushedFilters, t.partitionSpec.partitionColumns, selectedPartitions) +<<<<<<< HEAD + combineFilters + .reduceLeftOption(expressions.And) + .map(execution.Filter(_, scan)).getOrElse(scan) :: Nil +======= // Add a Projection to guarantee the original projection: // this is because "partitionAndNormalColumnAttrs" may be different // from the original "projects", in elements or their ordering @@ -116,6 +131,7 @@ private[sql] object DataSourceStrategy extends Strategy with Logging { execution.Project(projects, execution.Filter(cf, scan)) } ).getOrElse(scan) :: Nil +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Scanning non-partitioned HadoopFsRelation case PhysicalOperation(projects, filters, l @ LogicalRelation(t: HadoopFsRelation, _)) => diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelation.scala index 758bcd706a8c3..93a411771c587 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelation.scala @@ -93,7 +93,11 @@ private[sql] case class InsertIntoHadoopFsRelation( val isAppend = pathExists && (mode == SaveMode.Append) if (doInsertion) { +<<<<<<< HEAD + val job = new Job(hadoopConf) +======= val job = Job.getInstance(hadoopConf) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 job.setOutputKeyClass(classOf[Void]) job.setOutputValueClass(classOf[InternalRow]) FileOutputFormat.setOutputPath(job, qualifiedOutputPath) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala index 12f8783f846d6..3c19347ccbdaa 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SqlNewHadoopRDD.scala @@ -26,10 +26,17 @@ import org.apache.hadoop.conf.{Configurable, Configuration} import org.apache.hadoop.io.Writable import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileSplit} +<<<<<<< HEAD +import org.apache.spark.broadcast.Broadcast +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.executor.DataReadMethod +import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil +======= import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl} import org.apache.spark.broadcast.Broadcast import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.executor.DataReadMethod +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.{SQLConf, SQLContext} import org.apache.spark.sql.execution.datasources.parquet.UnsafeRowParquetRecordReader import org.apache.spark.storage.StorageLevel @@ -68,6 +75,18 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag]( initLocalJobFuncOpt: Option[Job => Unit], inputFormatClass: Class[_ <: InputFormat[Void, V]], valueClass: Class[V]) +<<<<<<< HEAD + extends RDD[V](sqlContext.sparkContext, Nil) + with SparkHadoopMapReduceUtil + with Logging { + + protected def getJob(): Job = { + val conf: Configuration = broadcastedConf.value.value + // "new Job" will make a copy of the conf. Then, it is + // safe to mutate conf properties with initLocalJobFuncOpt + // and initDriverSideJobFuncOpt. + val newJob = new Job(conf) +======= extends RDD[V](sqlContext.sparkContext, Nil) with Logging { protected def getJob(): Job = { @@ -76,6 +95,7 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag]( // safe to mutate conf properties with initLocalJobFuncOpt // and initDriverSideJobFuncOpt. val newJob = Job.getInstance(conf) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 initLocalJobFuncOpt.map(f => f(newJob)) newJob } @@ -85,7 +105,11 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag]( if (isDriverSide) { initDriverSideJobFuncOpt.map(f => f(job)) } +<<<<<<< HEAD + SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } private val jobTrackerId: String = { @@ -108,7 +132,11 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag]( configurable.setConf(conf) case _ => } +<<<<<<< HEAD + val jobContext = newJobContext(conf, jobId) +======= val jobContext = new JobContextImpl(conf, jobId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val rawSplits = inputFormat.getSplits(jobContext).toArray val result = new Array[SparkPartition](rawSplits.size) for (i <- 0 until rawSplits.size) { @@ -152,8 +180,13 @@ private[spark] class SqlNewHadoopRDD[V: ClassTag]( configurable.setConf(conf) case _ => } +<<<<<<< HEAD + val attemptId = newTaskAttemptID(jobTrackerId, id, isMap = true, split.index, 0) + val hadoopAttemptContext = newTaskAttemptContext(conf, attemptId) +======= val attemptId = new TaskAttemptID(jobTrackerId, id, TaskType.MAP, split.index, 0) val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[this] var reader: RecordReader[Void, V] = null /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala index 8b0b647744559..60346691283f0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/WriterContainer.scala @@ -24,10 +24,17 @@ import scala.collection.JavaConverters._ import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.output.{FileOutputCommitter => MapReduceFileOutputCommitter} +<<<<<<< HEAD +import org.apache.spark._ +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.mapred.SparkHadoopMapRedUtil +import org.apache.spark.mapreduce.SparkHadoopMapReduceUtil +======= import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl import org.apache.spark._ import org.apache.spark.mapred.SparkHadoopMapRedUtil +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.InternalRow @@ -41,12 +48,22 @@ private[sql] abstract class BaseWriterContainer( @transient val relation: HadoopFsRelation, @transient private val job: Job, isAppend: Boolean) +<<<<<<< HEAD + extends SparkHadoopMapReduceUtil + with Logging + with Serializable { +======= extends Logging with Serializable { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 protected val dataSchema = relation.dataSchema protected val serializableConf = +<<<<<<< HEAD + new SerializableConfiguration(SparkHadoopUtil.get.getConfigurationFromJobContext(job)) +======= new SerializableConfiguration(job.getConfiguration) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // This UUID is used to avoid output file name collision between different appending write jobs. // These jobs may belong to different SparkContext instances. Concrete data source implementations @@ -88,7 +105,12 @@ private[sql] abstract class BaseWriterContainer( // This UUID is sent to executor side together with the serialized `Configuration` object within // the `Job` instance. `OutputWriters` on the executor side should use this UUID to generate // unique task output files. +<<<<<<< HEAD + SparkHadoopUtil.get.getConfigurationFromJobContext(job). + set("spark.sql.sources.writeJobUUID", uniqueWriteJobId.toString) +======= job.getConfiguration.set("spark.sql.sources.writeJobUUID", uniqueWriteJobId.toString) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Order of the following two lines is important. For Hadoop 1, TaskAttemptContext constructor // clones the Configuration object passed in. If we initialize the TaskAttemptContext first, @@ -98,7 +120,11 @@ private[sql] abstract class BaseWriterContainer( // committer, since their initialization involve the job configuration, which can be potentially // decorated in `prepareJobForWrite`. outputWriterFactory = relation.prepareJobForWrite(job) +<<<<<<< HEAD + taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId) +======= taskAttemptContext = new TaskAttemptContextImpl(serializableConf.value, taskAttemptId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 outputFormatClass = job.getOutputFormatClass outputCommitter = newOutputCommitter(taskAttemptContext) @@ -108,7 +134,11 @@ private[sql] abstract class BaseWriterContainer( def executorSideSetup(taskContext: TaskContext): Unit = { setupIDs(taskContext.stageId(), taskContext.partitionId(), taskContext.attemptNumber()) setupConf() +<<<<<<< HEAD + taskAttemptContext = newTaskAttemptContext(serializableConf.value, taskAttemptId) +======= taskAttemptContext = new TaskAttemptContextImpl(serializableConf.value, taskAttemptId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 outputCommitter = newOutputCommitter(taskAttemptContext) outputCommitter.setupTask(taskAttemptContext) } @@ -163,7 +193,11 @@ private[sql] abstract class BaseWriterContainer( "because spark.speculation is configured to be true.") defaultOutputCommitter } else { +<<<<<<< HEAD + val configuration = SparkHadoopUtil.get.getConfigurationFromJobContext(context) +======= val configuration = context.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val committerClass = configuration.getClass( SQLConf.OUTPUT_COMMITTER_CLASS.key, null, classOf[OutputCommitter]) @@ -198,8 +232,15 @@ private[sql] abstract class BaseWriterContainer( private def setupIDs(jobId: Int, splitId: Int, attemptId: Int): Unit = { this.jobId = SparkHadoopWriter.createJobID(new Date, jobId) +<<<<<<< HEAD + this.taskId = new TaskID(this.jobId, true, splitId) + // scalastyle:off jobcontext + this.taskAttemptId = new TaskAttemptID(taskId, attemptId) + // scalastyle:on jobcontext +======= this.taskId = new TaskID(this.jobId, TaskType.MAP, splitId) this.taskAttemptId = new TaskAttemptID(taskId, attemptId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } private def setupConf(): Unit = { @@ -245,7 +286,11 @@ private[sql] class DefaultWriterContainer( def writeRows(taskContext: TaskContext, iterator: Iterator[InternalRow]): Unit = { executorSideSetup(taskContext) +<<<<<<< HEAD + val configuration = SparkHadoopUtil.get.getConfigurationFromJobContext(taskAttemptContext) +======= val configuration = taskAttemptContext.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 configuration.set("spark.sql.sources.output.path", outputPath) val writer = newOutputWriter(getWorkPath) writer.initConverter(dataSchema) @@ -327,10 +372,14 @@ private[sql] class DynamicPartitionWriterContainer( val partitionStringExpression = partitionColumns.zipWithIndex.flatMap { case (c, i) => val escaped = ScalaUDF( +<<<<<<< HEAD + PartitioningUtils.escapePathName _, StringType, Seq(Cast(c, StringType)), Seq(StringType)) +======= PartitioningUtils.escapePathName _, StringType, Seq(Cast(c, StringType)), Seq(StringType)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val str = If(IsNull(c), Literal(defaultPartitionName), escaped) val partitionName = Literal(c.name + "=") :: str :: Nil if (i == 0) partitionName else Literal(Path.SEPARATOR) :: partitionName @@ -416,7 +465,11 @@ private[sql] class DynamicPartitionWriterContainer( def newOutputWriter(key: InternalRow): OutputWriter = { val partitionPath = getPartitionString(key).getString(0) val path = new Path(getWorkPath, partitionPath) +<<<<<<< HEAD + val configuration = SparkHadoopUtil.get.getConfigurationFromJobContext(taskAttemptContext) +======= val configuration = taskAttemptContext.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 configuration.set( "spark.sql.sources.output.path", new Path(outputPath, partitionPath).toString) val newWriter = super.newOutputWriter(path.toString) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala index 87d43addd36ce..adc52c4a12219 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCRDD.scala @@ -122,6 +122,32 @@ private[sql] object JDBCRDD extends Logging { val dialect = JdbcDialects.get(url) val conn: Connection = getConnector(properties.getProperty("driver"), url, properties)() try { +<<<<<<< HEAD + val rs = conn.prepareStatement(s"SELECT * FROM $table WHERE 1=0").executeQuery() + try { + val rsmd = rs.getMetaData + val ncols = rsmd.getColumnCount + val fields = new Array[StructField](ncols) + var i = 0 + while (i < ncols) { + val columnName = rsmd.getColumnLabel(i + 1) + val dataType = rsmd.getColumnType(i + 1) + val typeName = rsmd.getColumnTypeName(i + 1) + val fieldSize = rsmd.getPrecision(i + 1) + val fieldScale = rsmd.getScale(i + 1) + val isSigned = rsmd.isSigned(i + 1) + val nullable = rsmd.isNullable(i + 1) != ResultSetMetaData.columnNoNulls + val metadata = new MetadataBuilder().putString("name", columnName) + val columnType = + dialect.getCatalystType(dataType, typeName, fieldSize, metadata).getOrElse( + getCatalystType(dataType, fieldSize, fieldScale, isSigned)) + fields(i) = StructField(columnName, columnType, nullable, metadata.build()) + i = i + 1 + } + return new StructType(fields) + } finally { + rs.close() +======= val statement = conn.prepareStatement(s"SELECT * FROM $table WHERE 1=0") try { val rs = statement.executeQuery() @@ -151,6 +177,7 @@ private[sql] object JDBCRDD extends Logging { } } finally { statement.close() +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } finally { conn.close() @@ -179,7 +206,10 @@ private[sql] object JDBCRDD extends Logging { case stringValue: String => s"'${escapeSql(stringValue)}'" case timestampValue: Timestamp => "'" + timestampValue + "'" case dateValue: Date => "'" + dateValue + "'" +<<<<<<< HEAD +======= case arrayValue: Array[Any] => arrayValue.map(compileValue).mkString(", ") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case _ => value } @@ -188,6 +218,20 @@ private[sql] object JDBCRDD extends Logging { /** * Turns a single Filter into a String representing a SQL expression. +<<<<<<< HEAD + * Returns null for an unhandled filter. + */ + private def compileFilter(f: Filter): String = f match { + case EqualTo(attr, value) => s"$attr = ${compileValue(value)}" + case Not(EqualTo(attr, value)) => s"$attr != ${compileValue(value)}" + case LessThan(attr, value) => s"$attr < ${compileValue(value)}" + case GreaterThan(attr, value) => s"$attr > ${compileValue(value)}" + case LessThanOrEqual(attr, value) => s"$attr <= ${compileValue(value)}" + case GreaterThanOrEqual(attr, value) => s"$attr >= ${compileValue(value)}" + case IsNull(attr) => s"$attr IS NULL" + case IsNotNull(attr) => s"$attr IS NOT NULL" + case _ => null +======= * Returns None for an unhandled filter. */ private def compileFilter(f: Filter): Option[String] = { @@ -226,6 +270,7 @@ private[sql] object JDBCRDD extends Logging { } case _ => null }) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** @@ -323,17 +368,39 @@ private[sql] class JDBCRDD( if (sb.length == 0) "1" else sb.substring(1) } +<<<<<<< HEAD + + /** + * `filters`, but as a WHERE clause suitable for injection into a SQL query. + */ + private val filterWhereClause: String = { + val filterStrings = filters.map(JDBCRDD.compileFilter).filter(_ != null) + if (filterStrings.size > 0) { + val sb = new StringBuilder("WHERE ") + filterStrings.foreach(x => sb.append(x).append(" AND ")) + sb.substring(0, sb.length - 5) + } else "" + } +======= /** * `filters`, but as a WHERE clause suitable for injection into a SQL query. */ private val filterWhereClause: String = filters.map(JDBCRDD.compileFilter).flatten.mkString(" AND ") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A WHERE clause representing both `filters`, if any, and the current partition. */ private def getWhereClause(part: JDBCPartition): String = { if (part.whereClause != null && filterWhereClause.length > 0) { +<<<<<<< HEAD + filterWhereClause + " AND " + part.whereClause + } else if (part.whereClause != null) { + "WHERE " + part.whereClause + } else { + filterWhereClause +======= "WHERE " + filterWhereClause + " AND " + part.whereClause } else if (part.whereClause != null) { "WHERE " + part.whereClause @@ -341,6 +408,7 @@ private[sql] class JDBCRDD( "WHERE " + filterWhereClause } else { "" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 46f2670eee010..c15f4c8a09a6d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -49,6 +49,9 @@ object JdbcUtils extends Logging { // Somewhat hacky, but there isn't a good way to identify whether a table exists for all // SQL database systems using JDBC meta data calls, considering "table" could also include // the database name. Query used to find table exists can be overriden by the dialects. +<<<<<<< HEAD + Try(conn.prepareStatement(dialect.getTableExistsQuery(table)).executeQuery()).isSuccess +======= Try { val statement = conn.prepareStatement(dialect.getTableExistsQuery(table)) try { @@ -57,18 +60,23 @@ object JdbcUtils extends Logging { statement.close() } }.isSuccess +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** * Drops a table from the JDBC database. */ def dropTable(conn: Connection, table: String): Unit = { +<<<<<<< HEAD + conn.createStatement.executeUpdate(s"DROP TABLE $table") +======= val statement = conn.createStatement try { statement.executeUpdate(s"DROP TABLE $table") } finally { statement.close() } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } /** diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala index 54a8552134c82..5977327d9a115 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JSONRelation.scala @@ -30,6 +30,11 @@ import org.apache.hadoop.mapreduce.{Job, RecordWriter, TaskAttemptContext} import org.apache.spark.Logging import org.apache.spark.broadcast.Broadcast +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.mapred.SparkHadoopMapRedUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.UnsafeProjection @@ -87,8 +92,13 @@ private[sql] class JSONRelation( override val needConversion: Boolean = false private def createBaseRdd(inputPaths: Array[FileStatus]): RDD[String] = { +<<<<<<< HEAD + val job = new Job(sqlContext.sparkContext.hadoopConfiguration) + val conf = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val job = Job.getInstance(sqlContext.sparkContext.hadoopConfiguration) val conf = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val paths = inputPaths.map(_.getPath) @@ -174,7 +184,11 @@ private[json] class JsonOutputWriter( path: String, dataSchema: StructType, context: TaskAttemptContext) +<<<<<<< HEAD + extends OutputWriter with SparkHadoopMapRedUtil with Logging { +======= extends OutputWriter with Logging { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[this] val writer = new CharArrayWriter() // create the Generator without separator inserted between 2 records @@ -184,9 +198,15 @@ private[json] class JsonOutputWriter( private val recordWriter: RecordWriter[NullWritable, Text] = { new TextOutputFormat[NullWritable, Text]() { override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = { +<<<<<<< HEAD + val configuration = SparkHadoopUtil.get.getConfigurationFromJobContext(context) + val uniqueWriteJobId = configuration.get("spark.sql.sources.writeJobUUID") + val taskAttemptId = SparkHadoopUtil.get.getTaskAttemptIDFromTaskAttemptContext(context) +======= val configuration = context.getConfiguration val uniqueWriteJobId = configuration.get("spark.sql.sources.writeJobUUID") val taskAttemptId = context.getTaskAttemptID +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val split = taskAttemptId.getTaskID.getId new Path(path, f"part-r-$split%05d-$uniqueWriteJobId$extension") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystReadSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystReadSupport.scala index e5d8e6088b395..2c7aa2c499825 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystReadSupport.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/CatalystReadSupport.scala @@ -58,7 +58,13 @@ private[parquet] class CatalystReadSupport extends ReadSupport[InternalRow] with */ override def init(context: InitContext): ReadContext = { catalystRequestedSchema = { +<<<<<<< HEAD + // scalastyle:off jobcontext val conf = context.getConfiguration + // scalastyle:on jobcontext +======= + val conf = context.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val schemaString = conf.get(CatalystReadSupport.SPARK_ROW_REQUESTED_SCHEMA) assert(schemaString != null, "Parquet requested schema not set.") StructType.fromString(schemaString) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/DirectParquetOutputCommitter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/DirectParquetOutputCommitter.scala index e54f51e3830f3..e8d6fd28c4515 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/DirectParquetOutputCommitter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/DirectParquetOutputCommitter.scala @@ -54,7 +54,15 @@ private[datasources] class DirectParquetOutputCommitter( override def setupTask(taskContext: TaskAttemptContext): Unit = {} override def commitJob(jobContext: JobContext) { +<<<<<<< HEAD + val configuration = { + // scalastyle:off jobcontext + ContextUtil.getConfiguration(jobContext) + // scalastyle:on jobcontext + } +======= val configuration = ContextUtil.getConfiguration(jobContext) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val fileSystem = outputPath.getFileSystem(configuration) if (configuration.getBoolean(ParquetOutputFormat.ENABLE_JOB_SUMMARY, true)) { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala index af964b4d35611..a983c5034ff53 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRelation.scala @@ -31,7 +31,10 @@ import org.apache.hadoop.fs.{FileStatus, Path} import org.apache.hadoop.io.Writable import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat +<<<<<<< HEAD +======= import org.apache.hadoop.mapreduce.task.JobContextImpl +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.parquet.filter2.predicate.FilterApi import org.apache.parquet.hadoop._ import org.apache.parquet.hadoop.metadata.CompressionCodecName @@ -41,6 +44,10 @@ import org.apache.parquet.{Log => ApacheParquetLog} import org.slf4j.bridge.SLF4JBridgeHandler import org.apache.spark.broadcast.Broadcast +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.{RDD, SqlNewHadoopPartition, SqlNewHadoopRDD} import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.InternalRow @@ -82,9 +89,15 @@ private[sql] class ParquetOutputWriter(path: String, context: TaskAttemptContext // `FileOutputCommitter.getWorkPath()`, which points to the base directory of all // partitions in the case of dynamic partitioning. override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = { +<<<<<<< HEAD + val configuration = SparkHadoopUtil.get.getConfigurationFromJobContext(context) + val uniqueWriteJobId = configuration.get("spark.sql.sources.writeJobUUID") + val taskAttemptId = SparkHadoopUtil.get.getTaskAttemptIDFromTaskAttemptContext(context) +======= val configuration = context.getConfiguration val uniqueWriteJobId = configuration.get("spark.sql.sources.writeJobUUID") val taskAttemptId = context.getTaskAttemptID +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val split = taskAttemptId.getTaskID.getId new Path(path, f"part-r-$split%05d-$uniqueWriteJobId$extension") } @@ -217,7 +230,15 @@ private[sql] class ParquetRelation( override def sizeInBytes: Long = metadataCache.dataStatuses.map(_.getLen).sum override def prepareJobForWrite(job: Job): OutputWriterFactory = { +<<<<<<< HEAD + val conf = { + // scalastyle:off jobcontext + ContextUtil.getConfiguration(job) + // scalastyle:on jobcontext + } +======= val conf = ContextUtil.getConfiguration(job) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // SPARK-9849 DirectParquetOutputCommitter qualified name should be backward compatible val committerClassName = conf.get(SQLConf.PARQUET_OUTPUT_COMMITTER_CLASS.key) @@ -336,7 +357,11 @@ private[sql] class ParquetRelation( // URI of the path to create a new Path. val pathWithEscapedAuthority = escapePathUserInfo(f.getPath) new FileStatus( +<<<<<<< HEAD + f.getLen, f.isDir, f.getReplication, f.getBlockSize, f.getModificationTime, +======= f.getLen, f.isDirectory, f.getReplication, f.getBlockSize, f.getModificationTime, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 f.getAccessTime, f.getPermission, f.getOwner, f.getGroup, pathWithEscapedAuthority) }.toSeq @@ -355,7 +380,11 @@ private[sql] class ParquetRelation( } } +<<<<<<< HEAD + val jobContext = newJobContext(getConf(isDriverSide = true), jobId) +======= val jobContext = new JobContextImpl(getConf(isDriverSide = true), jobId) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val rawSplits = inputFormat.getSplits(jobContext) Array.tabulate[SparkPartition](rawSplits.size) { i => @@ -560,7 +589,11 @@ private[sql] object ParquetRelation extends Logging { parquetFilterPushDown: Boolean, assumeBinaryIsString: Boolean, assumeInt96IsTimestamp: Boolean)(job: Job): Unit = { +<<<<<<< HEAD + val conf = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val conf = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 conf.set(ParquetInputFormat.READ_SUPPORT_CLASS, classOf[CatalystReadSupport].getName) // Try to push down filters when filter push-down is enabled. @@ -603,7 +636,11 @@ private[sql] object ParquetRelation extends Logging { FileInputFormat.setInputPaths(job, inputFiles.map(_.getPath): _*) } +<<<<<<< HEAD + overrideMinSplitSize(parquetBlockSize, SparkHadoopUtil.get.getConfigurationFromJobContext(job)) +======= overrideMinSplitSize(parquetBlockSize, job.getConfiguration) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } private[parquet] def readSchema( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/DefaultSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/DefaultSource.scala index 248467abe9f50..255a70b78495c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/DefaultSource.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/text/DefaultSource.scala @@ -26,6 +26,11 @@ import org.apache.hadoop.mapreduce.{RecordWriter, TaskAttemptContext, Job} import org.apache.hadoop.mapreduce.lib.input.FileInputFormat import org.apache.spark.broadcast.Broadcast +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.mapred.SparkHadoopMapRedUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions.UnsafeRow @@ -86,8 +91,13 @@ private[sql] class TextRelation( filters: Array[Filter], inputPaths: Array[FileStatus], broadcastedConf: Broadcast[SerializableConfiguration]): RDD[InternalRow] = { +<<<<<<< HEAD + val job = new Job(sqlContext.sparkContext.hadoopConfiguration) + val conf = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val job = Job.getInstance(sqlContext.sparkContext.hadoopConfiguration) val conf = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val paths = inputPaths.map(_.getPath).sortBy(_.toUri) if (paths.nonEmpty) { @@ -99,14 +109,22 @@ private[sql] class TextRelation( .mapPartitions { iter => val bufferHolder = new BufferHolder val unsafeRowWriter = new UnsafeRowWriter +<<<<<<< HEAD + val unsafeRow = new UnsafeRow +======= val unsafeRow = new UnsafeRow(1) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 iter.map { case (_, line) => // Writes to an UnsafeRow directly bufferHolder.reset() unsafeRowWriter.initialize(bufferHolder, 1) unsafeRowWriter.write(0, line.getBytes, 0, line.getLength) +<<<<<<< HEAD + unsafeRow.pointTo(bufferHolder.buffer, 1, bufferHolder.totalSize()) +======= unsafeRow.pointTo(bufferHolder.buffer, bufferHolder.totalSize()) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 unsafeRow } } @@ -136,16 +154,27 @@ private[sql] class TextRelation( } class TextOutputWriter(path: String, dataSchema: StructType, context: TaskAttemptContext) +<<<<<<< HEAD + extends OutputWriter + with SparkHadoopMapRedUtil { +======= extends OutputWriter { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[this] val buffer = new Text() private val recordWriter: RecordWriter[NullWritable, Text] = { new TextOutputFormat[NullWritable, Text]() { override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = { +<<<<<<< HEAD + val configuration = SparkHadoopUtil.get.getConfigurationFromJobContext(context) + val uniqueWriteJobId = configuration.get("spark.sql.sources.writeJobUUID") + val taskAttemptId = SparkHadoopUtil.get.getTaskAttemptIDFromTaskAttemptContext(context) +======= val configuration = context.getConfiguration val uniqueWriteJobId = configuration.get("spark.sql.sources.writeJobUUID") val taskAttemptId = context.getTaskAttemptID +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val split = taskAttemptId.getTaskID.getId new Path(path, f"part-r-$split%05d-$uniqueWriteJobId$extension") } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala index 81bfe4e67ca73..f7d6c32d0458e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/CartesianProduct.scala @@ -56,14 +56,23 @@ class UnsafeCartesianRDD(left : RDD[UnsafeRow], right : RDD[UnsafeRow], numField // Create an iterator from sorter and wrapper it as Iterator[UnsafeRow] def createIter(): Iterator[UnsafeRow] = { val iter = sorter.getIterator +<<<<<<< HEAD + val unsafeRow = new UnsafeRow +======= val unsafeRow = new UnsafeRow(numFieldsOfRight) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 new Iterator[UnsafeRow] { override def hasNext: Boolean = { iter.hasNext } override def next(): UnsafeRow = { iter.loadNext() +<<<<<<< HEAD + unsafeRow.pointTo(iter.getBaseObject, iter.getBaseOffset, numFieldsOfRight, + iter.getRecordLength) +======= unsafeRow.pointTo(iter.getBaseObject, iter.getBaseOffset, iter.getRecordLength) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 unsafeRow } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala index c6f56cfaed22c..9b65fde73819e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashedRelation.scala @@ -245,8 +245,13 @@ private[joins] final class UnsafeHashedRelation( val sizeInBytes = Platform.getInt(base, offset + 4) offset += 8 +<<<<<<< HEAD + val row = new UnsafeRow + row.pointTo(base, offset, numFields, sizeInBytes) +======= val row = new UnsafeRow(numFields) row.pointTo(base, offset, sizeInBytes) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 buffer += row offset += sizeInBytes } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index 97c5aed6da9c4..94f64c03cdef7 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -2512,8 +2512,12 @@ object functions extends LegacyFunctions { ////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////// +<<<<<<< HEAD + // scalastyle:off +======= // scalastyle:off line.size.limit // scalastyle:off parameter.number +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /* Use the following code to generate: (0 to 10).map { x => @@ -2529,7 +2533,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try($inputTypes).getOrElse(Nil) +======= val inputTypes = Try($inputTypes).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) }""") } @@ -2549,7 +2557,11 @@ object functions extends LegacyFunctions { */ @deprecated("Use udf", "1.5.0") def callUDF(f: Function$x[$fTypes], returnType: DataType${if (args.length > 0) ", " + args else ""}): Column = withExpr { +<<<<<<< HEAD + ScalaUDF(f, returnType, Seq($argsInUDF)) +======= ScalaUDF(f, returnType, Option(Seq($argsInUDF))) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 }""") } */ @@ -2561,7 +2573,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(Nil).getOrElse(Nil) +======= val inputTypes = Try(Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2573,7 +2589,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2585,7 +2605,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2597,7 +2621,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2609,7 +2637,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2621,7 +2653,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2633,7 +2669,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2645,7 +2685,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: ScalaReflection.schemaFor(typeTag[A7]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: ScalaReflection.schemaFor(typeTag[A7]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2657,7 +2701,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: ScalaReflection.schemaFor(typeTag[A7]).dataType :: ScalaReflection.schemaFor(typeTag[A8]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: ScalaReflection.schemaFor(typeTag[A7]).dataType :: ScalaReflection.schemaFor(typeTag[A8]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2669,7 +2717,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: ScalaReflection.schemaFor(typeTag[A7]).dataType :: ScalaReflection.schemaFor(typeTag[A8]).dataType :: ScalaReflection.schemaFor(typeTag[A9]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: ScalaReflection.schemaFor(typeTag[A7]).dataType :: ScalaReflection.schemaFor(typeTag[A8]).dataType :: ScalaReflection.schemaFor(typeTag[A9]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2681,7 +2733,11 @@ object functions extends LegacyFunctions { * @since 1.3.0 */ def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = { +<<<<<<< HEAD + val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: ScalaReflection.schemaFor(typeTag[A7]).dataType :: ScalaReflection.schemaFor(typeTag[A8]).dataType :: ScalaReflection.schemaFor(typeTag[A9]).dataType :: ScalaReflection.schemaFor(typeTag[A10]).dataType :: Nil).getOrElse(Nil) +======= val inputTypes = Try(ScalaReflection.schemaFor(typeTag[A1]).dataType :: ScalaReflection.schemaFor(typeTag[A2]).dataType :: ScalaReflection.schemaFor(typeTag[A3]).dataType :: ScalaReflection.schemaFor(typeTag[A4]).dataType :: ScalaReflection.schemaFor(typeTag[A5]).dataType :: ScalaReflection.schemaFor(typeTag[A6]).dataType :: ScalaReflection.schemaFor(typeTag[A7]).dataType :: ScalaReflection.schemaFor(typeTag[A8]).dataType :: ScalaReflection.schemaFor(typeTag[A9]).dataType :: ScalaReflection.schemaFor(typeTag[A10]).dataType :: Nil).toOption +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 UserDefinedFunction(f, ScalaReflection.schemaFor(typeTag[RT]).dataType, inputTypes) } @@ -2840,8 +2896,12 @@ object functions extends LegacyFunctions { ScalaUDF(f, returnType, Seq(arg1.expr, arg2.expr, arg3.expr, arg4.expr, arg5.expr, arg6.expr, arg7.expr, arg8.expr, arg9.expr, arg10.expr)) } +<<<<<<< HEAD + // scalastyle:on +======= // scalastyle:on parameter.number // scalastyle:on line.size.limit +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Call an user-defined function. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala index ad9e31690b2d8..c7997cc619976 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala @@ -64,7 +64,10 @@ private object PostgresDialect extends JdbcDialect { getJDBCType(et).map(_.databaseTypeDefinition) .orElse(JdbcUtils.getCommonJDBCType(et).map(_.databaseTypeDefinition)) .map(typeName => JdbcType(s"$typeName[]", java.sql.Types.ARRAY)) +<<<<<<< HEAD +======= case ByteType => throw new IllegalArgumentException(s"Unsupported type in postgresql: $dt"); +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case _ => None } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index d6c5d1435702d..ca5bbe271cd11 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -462,7 +462,11 @@ abstract class HadoopFsRelation private[sql]( name.toLowerCase == "_temporary" || name.startsWith(".") } +<<<<<<< HEAD + val (dirs, files) = statuses.partition(_.isDir) +======= val (dirs, files) = statuses.partition(_.isDirectory) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // It uses [[LinkedHashSet]] since the order of files can affect the results. (SPARK-11500) if (dirs.isEmpty) { @@ -858,10 +862,17 @@ private[sql] object HadoopFsRelation extends Logging { val jobConf = new JobConf(fs.getConf, this.getClass()) val pathFilter = FileInputFormat.getInputPathFilter(jobConf) if (pathFilter != null) { +<<<<<<< HEAD + val (dirs, files) = fs.listStatus(status.getPath, pathFilter).partition(_.isDir) + files ++ dirs.flatMap(dir => listLeafFiles(fs, dir)) + } else { + val (dirs, files) = fs.listStatus(status.getPath).partition(_.isDir) +======= val (dirs, files) = fs.listStatus(status.getPath, pathFilter).partition(_.isDirectory) files ++ dirs.flatMap(dir => listLeafFiles(fs, dir)) } else { val (dirs, files) = fs.listStatus(status.getPath).partition(_.isDirectory) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 files ++ dirs.flatMap(dir => listLeafFiles(fs, dir)) } } @@ -896,7 +907,11 @@ private[sql] object HadoopFsRelation extends Logging { FakeFileStatus( status.getPath.toString, status.getLen, +<<<<<<< HEAD + status.isDir, +======= status.isDirectory, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 status.getReplication, status.getBlockSize, status.getModificationTime, diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala index 3917b9762ba63..d20d1762db9c5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowSuite.scala @@ -292,6 +292,8 @@ class DataFrameWindowSuite extends QueryTest with SharedSQLContext { Row("b", 3, 8, 32), Row("b", 2, 4, 8))) } +<<<<<<< HEAD +======= test("null inputs") { val df = Seq(("a", 1), ("a", 1), ("a", 2), ("a", 2), ("b", 4), ("b", 3), ("b", 2)) @@ -312,4 +314,5 @@ class DataFrameWindowSuite extends QueryTest with SharedSQLContext { Row("b", 3, null, null), Row("b", 2, null, null))) } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index c19b5a4d98a85..b0d479dc5da7c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -514,7 +514,11 @@ class DatasetSuite extends QueryTest with SharedSQLContext { val e = intercept[AnalysisException] { ds.as[ClassData2].collect() } +<<<<<<< HEAD + assert(e.getMessage.contains("cannot resolve 'c' given input columns a, b"), e.getMessage) +======= assert(e.getMessage.contains("cannot resolve 'c' given input columns: [a, b]"), e.getMessage) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } test("runtime nullability check") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala index 359a1e7f8424a..ab4b7ad6c8ea0 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExtraStrategiesSuite.scala @@ -15,6 +15,18 @@ * limitations under the License. */ +<<<<<<< HEAD +package test.org.apache.spark.sql + +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Literal, GenericInternalRow, Attribute} +import org.apache.spark.sql.catalyst.plans.logical.{Project, LogicalPlan} +import org.apache.spark.sql.execution.SparkPlan +import org.apache.spark.sql.{Row, Strategy, QueryTest} +import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.unsafe.types.UTF8String +======= package org.apache.spark.sql import org.apache.spark.rdd.RDD @@ -23,6 +35,7 @@ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.plans.logical.{Project, LogicalPlan} import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.test.SharedSQLContext +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class FastOperator(output: Seq[Attribute]) extends SparkPlan { @@ -32,7 +45,10 @@ case class FastOperator(output: Seq[Attribute]) extends SparkPlan { sparkContext.parallelize(Seq(row)) } +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 override def children: Seq[SparkPlan] = Nil } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala index 815372f19233b..fb06b42db2b01 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala @@ -130,8 +130,11 @@ abstract class QueryTest extends PlanTest { checkJsonFormat(analyzedDF) +<<<<<<< HEAD +======= assertEmptyMissingInput(df) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 QueryTest.checkAnswer(analyzedDF, expectedAnswer) match { case Some(errorMessage) => fail(errorMessage) case None => @@ -200,9 +203,12 @@ abstract class QueryTest extends PlanTest { case a: ImperativeAggregate => return } +<<<<<<< HEAD +======= // bypass hive tests before we fix all corner cases in hive module. if (this.getClass.getName.startsWith("org.apache.spark.sql.hive")) return +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val jsonString = try { logicalPlan.toJSON } catch { @@ -214,6 +220,12 @@ abstract class QueryTest extends PlanTest { """.stripMargin, e) } +<<<<<<< HEAD + // bypass hive tests before we fix all corner cases in hive module. + if (this.getClass.getName.startsWith("org.apache.spark.sql.hive")) return + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // scala function is not serializable to JSON, use null to replace them so that we can compare // the plans later. val normalized1 = logicalPlan.transformAllExpressions { @@ -277,6 +289,8 @@ abstract class QueryTest extends PlanTest { """.stripMargin) } } +<<<<<<< HEAD +======= /** * Asserts that a given [[Queryable]] does not have missing inputs in all the analyzed plans. @@ -289,6 +303,7 @@ abstract class QueryTest extends PlanTest { assert(query.queryExecution.executedPlan.missingInput.isEmpty, s"The physical plan has missing inputs: ${query.queryExecution.executedPlan}") } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } object QueryTest { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index bb82b562aaaa2..861502c7aed9f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2027,5 +2027,22 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { """.stripMargin), Row(false) :: Row(true) :: Nil) } +<<<<<<< HEAD + + test("SPARK-12340: overstep the bounds of Int in SparkPlan.executeTake"){ + val rdd = sqlContext.sparkContext.parallelize(1 to 3 , 3 ) + + rdd.toDF("key").registerTempTable("spark12340") + checkAnswer( + sql("select key from spark12340 limit 2147483638"), + Row(1) :: Row(2) :: Row(3) :: Nil + ) + + assert(rdd.take(2147483638).size === 3) + + assert(rdd.takeAsync(2147483638).get.size === 3) + } +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala index a32763db054f3..f96b412900cde 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala @@ -34,8 +34,13 @@ class UnsafeRowSuite extends SparkFunSuite { test("UnsafeRow Java serialization") { // serializing an UnsafeRow pointing to a large buffer should only serialize the relevant data val data = new Array[Byte](1024) +<<<<<<< HEAD + val row = new UnsafeRow + row.pointTo(data, 1, 16) +======= val row = new UnsafeRow(1) row.pointTo(data, 16) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 row.setLong(0, 19285) val ser = new JavaSerializer(new SparkConf).newInstance() @@ -47,8 +52,13 @@ class UnsafeRowSuite extends SparkFunSuite { test("UnsafeRow Kryo serialization") { // serializing an UnsafeRow pointing to a large buffer should only serialize the relevant data val data = new Array[Byte](1024) +<<<<<<< HEAD + val row = new UnsafeRow + row.pointTo(data, 1, 16) +======= val row = new UnsafeRow(1) row.pointTo(data, 16) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 row.setLong(0, 19285) val ser = new KryoSerializer(new SparkConf).newInstance() @@ -86,10 +96,18 @@ class UnsafeRowSuite extends SparkFunSuite { offheapRowPage.getBaseOffset, arrayBackedUnsafeRow.getSizeInBytes ) +<<<<<<< HEAD + val offheapUnsafeRow: UnsafeRow = new UnsafeRow() + offheapUnsafeRow.pointTo( + offheapRowPage.getBaseObject, + offheapRowPage.getBaseOffset, + 3, // num fields +======= val offheapUnsafeRow: UnsafeRow = new UnsafeRow(3) offheapUnsafeRow.pointTo( offheapRowPage.getBaseObject, offheapRowPage.getBaseOffset, +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 arrayBackedUnsafeRow.getSizeInBytes ) assert(offheapUnsafeRow.getBaseObject === null) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index f42f173b2a863..e44544182f980 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -17,17 +17,27 @@ package org.apache.spark.sql.execution.datasources.parquet +<<<<<<< HEAD +import org.apache.parquet.filter2.predicate.Operators._ +import org.apache.parquet.filter2.predicate.{FilterPredicate, Operators} + +import org.apache.spark.sql.{Column, DataFrame, QueryTest, Row, SQLConf} +======= import org.apache.parquet.filter2.predicate.FilterApi._ import org.apache.parquet.filter2.predicate.Operators.{Column => _, _} import org.apache.parquet.filter2.predicate.{FilterPredicate, Operators} import org.apache.spark.sql._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, LogicalRelation} import org.apache.spark.sql.test.SharedSQLContext +<<<<<<< HEAD +======= import org.apache.spark.sql.types._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A test suite that tests Parquet filter2 API based filter pushdown optimization. @@ -325,6 +335,8 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } } +<<<<<<< HEAD +======= test("SPARK-12231: test the filter and empty project in partitioned DataSource scan") { import testImplicits._ @@ -366,6 +378,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 test("SPARK-11103: Filter applied on merged Parquet schema with new column fails") { import testImplicits._ @@ -425,6 +438,8 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } } +<<<<<<< HEAD +======= test("SPARK-12218 Converting conjunctions into Parquet filter predicates") { val schema = StructType(Seq( StructField("a", IntegerType, nullable = false), @@ -461,6 +476,7 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex } } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 test("SPARK-11164: test the parquet filter in") { import testImplicits._ withSQLConf(SQLConf.PARQUET_FILTER_PUSHDOWN_ENABLED.key -> "true") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala index dae72e8acb5a7..1aa72e66766dc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala @@ -26,7 +26,10 @@ import org.scalatest.BeforeAndAfter import org.scalatest.PrivateMethodTester import org.apache.spark.SparkFunSuite +<<<<<<< HEAD +======= import org.apache.spark.sql.Row +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ @@ -185,6 +188,12 @@ class JDBCSuite extends SparkFunSuite assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID != 2")).collect().size == 2) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE THEID = 1")).collect().size == 1) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME = 'fred'")).collect().size == 1) +<<<<<<< HEAD + assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size == 2) + assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size == 2) + assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NULL")).collect().size == 1) + assert(stripSparkFilter(sql("SELECT * FROM nulltypes WHERE A IS NOT NULL")).collect().size == 0) +======= assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME <=> 'fred'")).collect().size == 1) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME > 'fred'")).collect().size == 2) assert(stripSparkFilter(sql("SELECT * FROM foobar WHERE NAME != 'fred'")).collect().size == 2) @@ -208,6 +217,7 @@ class JDBCSuite extends SparkFunSuite val df2 = sql("SELECT * FROM foobar WHERE NOT (THEID != 2) OR NOT (NAME != 'mary')") assert(df1.collect.toSet === Set(Row("mary", 2))) assert(df2.collect.toSet === Set(Row("mary", 2))) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } test("SELECT * WHERE (quoted strings)") { @@ -454,6 +464,12 @@ class JDBCSuite extends SparkFunSuite } test("compile filters") { +<<<<<<< HEAD + val compileFilter = PrivateMethod[String]('compileFilter) + def doCompileFilter(f: Filter): String = JDBCRDD invokePrivate compileFilter(f) + assert(doCompileFilter(EqualTo("col0", 3)) === "col0 = 3") + assert(doCompileFilter(Not(EqualTo("col1", "abc"))) === "col1 != 'abc'") +======= val compileFilter = PrivateMethod[Option[String]]('compileFilter) def doCompileFilter(f: Filter): String = JDBCRDD invokePrivate compileFilter(f) getOrElse("") assert(doCompileFilter(EqualTo("col0", 3)) === "col0 = 3") @@ -462,6 +478,7 @@ class JDBCSuite extends SparkFunSuite === "(col0 = 0) AND (col1 = 'def')") assert(doCompileFilter(Or(EqualTo("col0", 2), EqualTo("col1", "ghi"))) === "(col0 = 2) OR (col1 = 'ghi')") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 assert(doCompileFilter(LessThan("col0", 5)) === "col0 < 5") assert(doCompileFilter(LessThan("col3", Timestamp.valueOf("1995-11-21 00:00:00.0"))) === "col3 < '1995-11-21 00:00:00.0'") @@ -469,6 +486,10 @@ class JDBCSuite extends SparkFunSuite assert(doCompileFilter(LessThanOrEqual("col0", 5)) === "col0 <= 5") assert(doCompileFilter(GreaterThan("col0", 3)) === "col0 > 3") assert(doCompileFilter(GreaterThanOrEqual("col0", 3)) === "col0 >= 3") +<<<<<<< HEAD + assert(doCompileFilter(IsNull("col1")) === "col1 IS NULL") + assert(doCompileFilter(IsNotNull("col1")) === "col1 IS NOT NULL") +======= assert(doCompileFilter(In("col1", Array("jkl"))) === "col1 IN ('jkl')") assert(doCompileFilter(Not(In("col1", Array("mno", "pqr")))) === "(NOT (col1 IN ('mno', 'pqr')))") @@ -477,6 +498,7 @@ class JDBCSuite extends SparkFunSuite assert(doCompileFilter(And(EqualNullSafe("col0", "abc"), EqualTo("col1", "def"))) === "((NOT (col0 != 'abc' OR col0 IS NULL OR 'abc' IS NULL) " + "OR (col0 IS NULL AND 'abc' IS NULL))) AND (col1 = 'def')") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } test("Dialect unregister") { @@ -512,10 +534,13 @@ class JDBCSuite extends SparkFunSuite val Postgres = JdbcDialects.get("jdbc:postgresql://127.0.0.1/db") assert(Postgres.getCatalystType(java.sql.Types.OTHER, "json", 1, null) === Some(StringType)) assert(Postgres.getCatalystType(java.sql.Types.OTHER, "jsonb", 1, null) === Some(StringType)) +<<<<<<< HEAD +======= val errMsg = intercept[IllegalArgumentException] { Postgres.getJDBCType(ByteType) } assert(errMsg.getMessage contains "Unsupported type in postgresql: ByteType") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } test("DerbyDialect jdbc type mapping") { diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala index 2b0e48dbfcf28..e982146ae9b08 100644 --- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala +++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala @@ -308,12 +308,16 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { // The difference between the double numbers generated by Hive and Spark // can be ignored (e.g., 0.6633880657639323 and 0.6633880657639322) +<<<<<<< HEAD + "udaf_corr" +======= "udaf_corr", // Feature removed in HIVE-11145 "alter_partition_protect_mode", "drop_partitions_ignore_protection", "protectmode" +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 ) /** @@ -333,6 +337,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "alter_index", "alter_merge_2", "alter_partition_format_loc", +<<<<<<< HEAD + "alter_partition_protect_mode", +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 "alter_partition_with_whitelist", "alter_rename_partition", "alter_table_serde", @@ -464,6 +472,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "drop_partitions_filter", "drop_partitions_filter2", "drop_partitions_filter3", +<<<<<<< HEAD + "drop_partitions_ignore_protection", +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 "drop_table", "drop_table2", "drop_table_removes_partition_dirs", @@ -781,6 +793,10 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter { "ppr_pushdown2", "ppr_pushdown3", "progress_1", +<<<<<<< HEAD + "protectmode", +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 "push_or", "query_with_semi", "quote1", diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index ffabb92179a18..905f244001b4e 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -232,7 +232,10 @@ v${hive.version.short}/src/main/scala +<<<<<<< HEAD +======= ${project.build.directory/generated-sources/antlr +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 @@ -261,6 +264,8 @@ +<<<<<<< HEAD +======= @@ -282,6 +287,7 @@ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala index 5d00e7367026f..c0ea5698323f1 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala @@ -380,7 +380,11 @@ class HiveContext private[hive]( def calculateTableSize(fs: FileSystem, path: Path): Long = { val fileStatus = fs.getFileStatus(path) +<<<<<<< HEAD + val size = if (fileStatus.isDir) { +======= val size = if (fileStatus.isDirectory) { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 fs.listStatus(path) .map { status => if (!status.getPath().getName().startsWith(stagingDir)) { @@ -692,6 +696,13 @@ private[hive] object HiveContext { val CONVERT_METASTORE_PARQUET_WITH_SCHEMA_MERGING = booleanConf( "spark.sql.hive.convertMetastoreParquet.mergeSchema", defaultValue = Some(false), +<<<<<<< HEAD + doc = "TODO") + + val CONVERT_CTAS = booleanConf("spark.sql.hive.convertCTAS", + defaultValue = Some(false), + doc = "TODO") +======= doc = "When true, also tries to merge possibly different but compatible Parquet schemas in " + "different Parquet data files. This configuration is only effective " + "when \"spark.sql.hive.convertMetastoreParquet\" is true.") @@ -700,6 +711,7 @@ private[hive] object HiveContext { defaultValue = Some(false), doc = "When true, a table created by a Hive CTAS statement (no USING clause) will be " + "converted to a data source table, using the data source set by spark.sql.sources.default.") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val HIVE_METASTORE_SHARED_PREFIXES = stringSeqConf("spark.sql.hive.metastore.sharedPrefixes", defaultValue = Some(jdbcPrefixes), @@ -720,7 +732,11 @@ private[hive] object HiveContext { val HIVE_THRIFT_SERVER_ASYNC = booleanConf("spark.sql.hive.thriftServer.async", defaultValue = Some(true), +<<<<<<< HEAD + doc = "TODO") +======= doc = "When set to true, Hive Thrift server executes SQL queries in an asynchronous way.") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** Constructs a configuration for hive, where the metastore is located in a temp directory. */ def newTemporaryConfiguration(useInMemoryDerby: Boolean): Map[String, String] = { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index b1d841d1b5543..54afa2b5724e8 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -27,12 +27,28 @@ import org.apache.hadoop.hive.conf.HiveConf import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.ql.exec.{FunctionInfo, FunctionRegistry} import org.apache.hadoop.hive.ql.lib.Node +<<<<<<< HEAD +import org.apache.hadoop.hive.ql.parse._ +======= import org.apache.hadoop.hive.ql.parse.SemanticException +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.hive.ql.plan.PlanUtils import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.ql.{Context, ErrorMsg} import org.apache.hadoop.hive.serde.serdeConstants import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe +<<<<<<< HEAD + +import org.apache.spark.Logging +import org.apache.spark.sql.{AnalysisException, catalyst} +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.aggregate._ +import org.apache.spark.sql.catalyst.plans.{logical, _} +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.trees.CurrentOrigin +import org.apache.spark.sql.catalyst.TableIdentifier +======= import org.apache.spark.Logging import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis._ @@ -41,14 +57,19 @@ import org.apache.spark.sql.catalyst.expressions.aggregate._ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.plans.{logical, _} import org.apache.spark.sql.catalyst.trees.CurrentOrigin +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.sql.execution.ExplainCommand import org.apache.spark.sql.execution.datasources.DescribeCommand import org.apache.spark.sql.hive.HiveShim._ import org.apache.spark.sql.hive.client._ import org.apache.spark.sql.hive.execution.{AnalyzeTable, DropTable, HiveNativeCommand, HiveScriptIOSchema} +<<<<<<< HEAD +import org.apache.spark.sql.types._ +======= import org.apache.spark.sql.parser._ import org.apache.spark.sql.types._ import org.apache.spark.sql.{AnalysisException, catalyst} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.unsafe.types.CalendarInterval import org.apache.spark.util.random.RandomSampler @@ -227,7 +248,11 @@ private[hive] object HiveQl extends Logging { */ def withChildren(newChildren: Seq[ASTNode]): ASTNode = { (1 to n.getChildCount).foreach(_ => n.deleteChild(0)) +<<<<<<< HEAD + n.addChildren(newChildren.asJava) +======= newChildren.foreach(n.addChild(_)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 n } @@ -273,8 +298,12 @@ private[hive] object HiveQl extends Logging { private def createContext(): Context = new Context(hiveConf) private def getAst(sql: String, context: Context) = +<<<<<<< HEAD + ParseUtils.findRootNonNullToken((new ParseDriver).parse(sql, context)) +======= ParseUtils.findRootNonNullToken( (new ParseDriver).parse(sql, context)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Returns the HiveConf @@ -313,7 +342,11 @@ private[hive] object HiveQl extends Logging { context.clear() plan } catch { +<<<<<<< HEAD + case pe: org.apache.hadoop.hive.ql.parse.ParseException => +======= case pe: ParseException => +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 pe.getMessage match { case errorRegEx(line, start, message) => throw new AnalysisException(message, Some(line.toInt), Some(start.toInt)) @@ -338,8 +371,12 @@ private[hive] object HiveQl extends Logging { val tree = try { ParseUtils.findRootNonNullToken( +<<<<<<< HEAD + (new ParseDriver).parse(ddl, null /* no context required for parsing alone */)) +======= (new ParseDriver) .parse(ddl, null /* no context required for parsing alone */)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } catch { case pe: org.apache.hadoop.hive.ql.parse.ParseException => throw new RuntimeException(s"Failed to parse ddl: '$ddl'", pe) @@ -600,12 +637,21 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C NativePlaceholder } else { tableType match { +<<<<<<< HEAD + case Token("TOK_TABTYPE", nameParts) if nameParts.size == 1 => { + nameParts.head match { + case Token(".", dbName :: tableName :: Nil) => + // It is describing a table with the format like "describe db.table". + // TODO: Actually, a user may mean tableName.columnName. Need to resolve this issue. + val tableIdent = extractTableIdent(nameParts.head) +======= case Token("TOK_TABTYPE", Token("TOK_TABNAME", nameParts :: Nil) :: Nil) => { nameParts match { case Token(".", dbName :: tableName :: Nil) => // It is describing a table with the format like "describe db.table". // TODO: Actually, a user may mean tableName.columnName. Need to resolve this issue. val tableIdent = extractTableIdent(nameParts) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 DescribeCommand( UnresolvedRelation(tableIdent, None), isExtended = extended.isDefined) case Token(".", dbName :: tableName :: colName :: Nil) => @@ -664,7 +710,11 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C NativePlaceholder } else { val schema = maybeColumns.map { cols => +<<<<<<< HEAD + BaseSemanticAnalyzer.getColumns(cols, true).asScala.map { field => +======= SemanticAnalyzer.getColumns(cols, true).asScala.map { field => +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // We can't specify column types when create view, so fill it with null first, and // update it after the schema has been resolved later. HiveColumn(field.getName, null, field.getComment) @@ -680,7 +730,11 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C maybeComment.foreach { case Token("TOK_TABLECOMMENT", child :: Nil) => +<<<<<<< HEAD + val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText) +======= val comment = SemanticAnalyzer.unescapeSQLString(child.getText) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (comment ne null) { properties += ("comment" -> comment) } @@ -752,7 +806,11 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C children.collect { case list @ Token("TOK_TABCOLLIST", _) => +<<<<<<< HEAD + val cols = BaseSemanticAnalyzer.getColumns(list, true) +======= val cols = SemanticAnalyzer.getColumns(list, true) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (cols != null) { tableDesc = tableDesc.copy( schema = cols.asScala.map { field => @@ -760,11 +818,19 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C }) } case Token("TOK_TABLECOMMENT", child :: Nil) => +<<<<<<< HEAD + val comment = BaseSemanticAnalyzer.unescapeSQLString(child.getText) + // TODO support the sql text + tableDesc = tableDesc.copy(viewText = Option(comment)) + case Token("TOK_TABLEPARTCOLS", list @ Token("TOK_TABCOLLIST", _) :: Nil) => + val cols = BaseSemanticAnalyzer.getColumns(list(0), false) +======= val comment = SemanticAnalyzer.unescapeSQLString(child.getText) // TODO support the sql text tableDesc = tableDesc.copy(viewText = Option(comment)) case Token("TOK_TABLEPARTCOLS", list @ Token("TOK_TABCOLLIST", _) :: Nil) => val cols = SemanticAnalyzer.getColumns(list(0), false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (cols != null) { tableDesc = tableDesc.copy( partitionColumns = cols.asScala.map { field => @@ -775,6 +841,23 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C val serdeParams = new java.util.HashMap[String, String]() child match { case Token("TOK_TABLEROWFORMATFIELD", rowChild1 :: rowChild2) => +<<<<<<< HEAD + val fieldDelim = BaseSemanticAnalyzer.unescapeSQLString (rowChild1.getText()) + serdeParams.put(serdeConstants.FIELD_DELIM, fieldDelim) + serdeParams.put(serdeConstants.SERIALIZATION_FORMAT, fieldDelim) + if (rowChild2.length > 1) { + val fieldEscape = BaseSemanticAnalyzer.unescapeSQLString (rowChild2(0).getText) + serdeParams.put(serdeConstants.ESCAPE_CHAR, fieldEscape) + } + case Token("TOK_TABLEROWFORMATCOLLITEMS", rowChild :: Nil) => + val collItemDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + serdeParams.put(serdeConstants.COLLECTION_DELIM, collItemDelim) + case Token("TOK_TABLEROWFORMATMAPKEYS", rowChild :: Nil) => + val mapKeyDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) + serdeParams.put(serdeConstants.MAPKEY_DELIM, mapKeyDelim) + case Token("TOK_TABLEROWFORMATLINES", rowChild :: Nil) => + val lineDelim = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) +======= val fieldDelim = SemanticAnalyzer.unescapeSQLString (rowChild1.getText()) serdeParams.put(serdeConstants.FIELD_DELIM, fieldDelim) serdeParams.put(serdeConstants.SERIALIZATION_FORMAT, fieldDelim) @@ -790,6 +873,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C serdeParams.put(serdeConstants.MAPKEY_DELIM, mapKeyDelim) case Token("TOK_TABLEROWFORMATLINES", rowChild :: Nil) => val lineDelim = SemanticAnalyzer.unescapeSQLString(rowChild.getText) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 if (!(lineDelim == "\n") && !(lineDelim == "10")) { throw new AnalysisException( SemanticAnalyzer.generateErrorMessage( @@ -798,13 +882,28 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C } serdeParams.put(serdeConstants.LINE_DELIM, lineDelim) case Token("TOK_TABLEROWFORMATNULL", rowChild :: Nil) => +<<<<<<< HEAD + val nullFormat = BaseSemanticAnalyzer.unescapeSQLString(rowChild.getText) +======= val nullFormat = SemanticAnalyzer.unescapeSQLString(rowChild.getText) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // TODO support the nullFormat case _ => assert(false) } tableDesc = tableDesc.copy( serdeProperties = tableDesc.serdeProperties ++ serdeParams.asScala) case Token("TOK_TABLELOCATION", child :: Nil) => +<<<<<<< HEAD + var location = BaseSemanticAnalyzer.unescapeSQLString(child.getText) + location = EximUtil.relativeToAbsolutePath(hiveConf, location) + tableDesc = tableDesc.copy(location = Option(location)) + case Token("TOK_TABLESERIALIZER", child :: Nil) => + tableDesc = tableDesc.copy( + serde = Option(BaseSemanticAnalyzer.unescapeSQLString(child.getChild(0).getText))) + if (child.getChildCount == 2) { + val serdeParams = new java.util.HashMap[String, String]() + BaseSemanticAnalyzer.readProps( +======= var location = SemanticAnalyzer.unescapeSQLString(child.getText) location = SemanticAnalyzer.relativeToAbsolutePath(hiveConf, location) tableDesc = tableDesc.copy(location = Option(location)) @@ -814,6 +913,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C if (child.getChildCount == 2) { val serdeParams = new java.util.HashMap[String, String]() SemanticAnalyzer.readProps( +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 (child.getChild(1).getChild(0)).asInstanceOf[ASTNode], serdeParams) tableDesc = tableDesc.copy( serdeProperties = tableDesc.serdeProperties ++ serdeParams.asScala) @@ -893,9 +993,15 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case list @ Token("TOK_TABLEFILEFORMAT", children) => tableDesc = tableDesc.copy( inputFormat = +<<<<<<< HEAD + Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)), + outputFormat = + Option(BaseSemanticAnalyzer.unescapeSQLString(list.getChild(1).getText))) +======= Option(SemanticAnalyzer.unescapeSQLString(list.getChild(0).getText)), outputFormat = Option(SemanticAnalyzer.unescapeSQLString(list.getChild(1).getText))) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case Token("TOK_STORAGEHANDLER", _) => throw new AnalysisException(ErrorMsg.CREATE_NON_NATIVE_AS.getMsg()) case _ => // Unsupport features @@ -911,6 +1017,26 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C Token("TOK_TABLE_PARTITION", table) :: Nil) => NativePlaceholder case Token("TOK_QUERY", queryArgs) +<<<<<<< HEAD + if Seq("TOK_FROM", "TOK_INSERT").contains(queryArgs.head.getText) => + + val (fromClause: Option[ASTNode], insertClauses, cteRelations) = + queryArgs match { + case Token("TOK_FROM", args: Seq[ASTNode]) :: insertClauses => + // check if has CTE + insertClauses.last match { + case Token("TOK_CTE", cteClauses) => + val cteRelations = cteClauses.map(node => { + val relation = nodeToRelation(node, context).asInstanceOf[Subquery] + (relation.alias, relation) + }).toMap + (Some(args.head), insertClauses.init, Some(cteRelations)) + + case _ => (Some(args.head), insertClauses, None) + } + + case Token("TOK_INSERT", _) :: Nil => (None, queryArgs, None) +======= if Seq("TOK_CTE", "TOK_FROM", "TOK_INSERT").contains(queryArgs.head.getText) => val (fromClause: Option[ASTNode], insertClauses, cteRelations) = @@ -925,6 +1051,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C (Some(from.head), inserts, None) case Token("TOK_INSERT", _) :: Nil => (None, queryArgs, None) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } // Return one query for each insert clause. @@ -1023,20 +1150,33 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C (rowFormat, None, Nil, false) case Token("TOK_SERDENAME", Token(serdeClass, Nil) :: Nil) :: Nil => +<<<<<<< HEAD + (Nil, Some(BaseSemanticAnalyzer.unescapeSQLString(serdeClass)), Nil, false) +======= (Nil, Some(SemanticAnalyzer.unescapeSQLString(serdeClass)), Nil, false) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case Token("TOK_SERDENAME", Token(serdeClass, Nil) :: Token("TOK_TABLEPROPERTIES", Token("TOK_TABLEPROPLIST", propsClause) :: Nil) :: Nil) :: Nil => val serdeProps = propsClause.map { case Token("TOK_TABLEPROPERTY", Token(name, Nil) :: Token(value, Nil) :: Nil) => +<<<<<<< HEAD + (BaseSemanticAnalyzer.unescapeSQLString(name), + BaseSemanticAnalyzer.unescapeSQLString(value)) +======= (SemanticAnalyzer.unescapeSQLString(name), SemanticAnalyzer.unescapeSQLString(value)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } // SPARK-10310: Special cases LazySimpleSerDe // TODO Fully supports user-defined record reader/writer classes +<<<<<<< HEAD + val unescapedSerDeClass = BaseSemanticAnalyzer.unescapeSQLString(serdeClass) +======= val unescapedSerDeClass = SemanticAnalyzer.unescapeSQLString(serdeClass) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val useDefaultRecordReaderWriter = unescapedSerDeClass == classOf[LazySimpleSerDe].getCanonicalName (Nil, Some(unescapedSerDeClass), serdeProps, useDefaultRecordReaderWriter) @@ -1053,7 +1193,11 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C val (outRowFormat, outSerdeClass, outSerdeProps, useDefaultRecordWriter) = matchSerDe(outputSerdeClause) +<<<<<<< HEAD + val unescapedScript = BaseSemanticAnalyzer.unescapeSQLString(script) +======= val unescapedScript = SemanticAnalyzer.unescapeSQLString(script) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // TODO Adds support for user-defined record reader/writer classes val recordReaderClass = if (useDefaultRecordReader) { @@ -1359,7 +1503,10 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case "TOK_LEFTOUTERJOIN" => LeftOuter case "TOK_FULLOUTERJOIN" => FullOuter case "TOK_LEFTSEMIJOIN" => LeftSemi +<<<<<<< HEAD +======= case "TOK_ANTIJOIN" => throw new NotImplementedError("Anti join not supported") +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } Join(nodeToRelation(relation1, context), nodeToRelation(relation2, context), @@ -1474,11 +1621,19 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C } val numericAstTypes = Seq( +<<<<<<< HEAD + HiveParser.Number, + HiveParser.TinyintLiteral, + HiveParser.SmallintLiteral, + HiveParser.BigintLiteral, + HiveParser.DecimalLiteral) +======= SparkSqlParser.Number, SparkSqlParser.TinyintLiteral, SparkSqlParser.SmallintLiteral, SparkSqlParser.BigintLiteral, SparkSqlParser.DecimalLiteral) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /* Case insensitive matches */ val COUNT = "(?i)COUNT".r @@ -1648,7 +1803,11 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C case Token(TRUE(), Nil) => Literal.create(true, BooleanType) case Token(FALSE(), Nil) => Literal.create(false, BooleanType) case Token("TOK_STRINGLITERALSEQUENCE", strings) => +<<<<<<< HEAD + Literal(strings.map(s => BaseSemanticAnalyzer.unescapeSQLString(s.getText)).mkString) +======= Literal(strings.map(s => SemanticAnalyzer.unescapeSQLString(s.getText)).mkString) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // This code is adapted from // /ql/src/java/org/apache/hadoop/hive/ql/parse/TypeCheckProcFactory.java#L223 @@ -1683,6 +1842,39 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C v } +<<<<<<< HEAD + case ast: ASTNode if ast.getType == HiveParser.StringLiteral => + Literal(BaseSemanticAnalyzer.unescapeSQLString(ast.getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_DATELITERAL => + Literal(Date.valueOf(ast.getText.substring(1, ast.getText.length - 1))) + + case ast: ASTNode if ast.getType == HiveParser.TOK_CHARSETLITERAL => + Literal(BaseSemanticAnalyzer.charSetString(ast.getChild(0).getText, ast.getChild(1).getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_YEAR_MONTH_LITERAL => + Literal(CalendarInterval.fromYearMonthString(ast.getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_DAY_TIME_LITERAL => + Literal(CalendarInterval.fromDayTimeString(ast.getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_YEAR_LITERAL => + Literal(CalendarInterval.fromSingleUnitString("year", ast.getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_MONTH_LITERAL => + Literal(CalendarInterval.fromSingleUnitString("month", ast.getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_DAY_LITERAL => + Literal(CalendarInterval.fromSingleUnitString("day", ast.getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_HOUR_LITERAL => + Literal(CalendarInterval.fromSingleUnitString("hour", ast.getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_MINUTE_LITERAL => + Literal(CalendarInterval.fromSingleUnitString("minute", ast.getText)) + + case ast: ASTNode if ast.getType == HiveParser.TOK_INTERVAL_SECOND_LITERAL => +======= case ast: ASTNode if ast.getType == SparkSqlParser.StringLiteral => Literal(SemanticAnalyzer.unescapeSQLString(ast.getText)) @@ -1714,6 +1906,7 @@ https://cwiki.apache.org/confluence/display/Hive/Enhanced+Aggregation%2C+Cube%2C Literal(CalendarInterval.fromSingleUnitString("minute", ast.getText)) case ast: ASTNode if ast.getType == SparkSqlParser.TOK_INTERVAL_SECOND_LITERAL => +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 Literal(CalendarInterval.fromSingleUnitString("second", ast.getText)) case a: ASTNode => diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala index d3da22aa0ae5c..224a64d1ee772 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/ClientWrapper.scala @@ -31,7 +31,13 @@ import org.apache.hadoop.hive.ql.metadata.Hive import org.apache.hadoop.hive.ql.processors._ import org.apache.hadoop.hive.ql.session.SessionState import org.apache.hadoop.hive.ql.{Driver, metadata} +<<<<<<< HEAD +import org.apache.hadoop.hive.shims.{HadoopShims, ShimLoader} import org.apache.hadoop.security.UserGroupInformation +import org.apache.hadoop.util.VersionInfo +======= +import org.apache.hadoop.security.UserGroupInformation +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.{SparkConf, SparkException, Logging} import org.apache.spark.sql.catalyst.expressions.Expression @@ -63,6 +69,77 @@ private[hive] class ClientWrapper( extends ClientInterface with Logging { +<<<<<<< HEAD + overrideHadoopShims() + + // !! HACK ALERT !! + // + // Internally, Hive `ShimLoader` tries to load different versions of Hadoop shims by checking + // major version number gathered from Hadoop jar files: + // + // - For major version number 1, load `Hadoop20SShims`, where "20S" stands for Hadoop 0.20 with + // security. + // - For major version number 2, load `Hadoop23Shims`, where "23" stands for Hadoop 0.23. + // + // However, APIs in Hadoop 2.0.x and 2.1.x versions were in flux due to historical reasons. It + // turns out that Hadoop 2.0.x versions should also be used together with `Hadoop20SShims`, but + // `Hadoop23Shims` is chosen because the major version number here is 2. + // + // To fix this issue, we try to inspect Hadoop version via `org.apache.hadoop.utils.VersionInfo` + // and load `Hadoop20SShims` for Hadoop 1.x and 2.0.x versions. If Hadoop version information is + // not available, we decide whether to override the shims or not by checking for existence of a + // probe method which doesn't exist in Hadoop 1.x or 2.0.x versions. + private def overrideHadoopShims(): Unit = { + val hadoopVersion = VersionInfo.getVersion + val VersionPattern = """(\d+)\.(\d+).*""".r + + hadoopVersion match { + case null => + logError("Failed to inspect Hadoop version") + + // Using "Path.getPathWithoutSchemeAndAuthority" as the probe method. + val probeMethod = "getPathWithoutSchemeAndAuthority" + if (!classOf[Path].getDeclaredMethods.exists(_.getName == probeMethod)) { + logInfo( + s"Method ${classOf[Path].getCanonicalName}.$probeMethod not found, " + + s"we are probably using Hadoop 1.x or 2.0.x") + loadHadoop20SShims() + } + + case VersionPattern(majorVersion, minorVersion) => + logInfo(s"Inspected Hadoop version: $hadoopVersion") + + // Loads Hadoop20SShims for 1.x and 2.0.x versions + val (major, minor) = (majorVersion.toInt, minorVersion.toInt) + if (major < 2 || (major == 2 && minor == 0)) { + loadHadoop20SShims() + } + } + + // Logs the actual loaded Hadoop shims class + val loadedShimsClassName = ShimLoader.getHadoopShims.getClass.getCanonicalName + logInfo(s"Loaded $loadedShimsClassName for Hadoop version $hadoopVersion") + } + + private def loadHadoop20SShims(): Unit = { + val hadoop20SShimsClassName = "org.apache.hadoop.hive.shims.Hadoop20SShims" + logInfo(s"Loading Hadoop shims $hadoop20SShimsClassName") + + try { + val shimsField = classOf[ShimLoader].getDeclaredField("hadoopShims") + // scalastyle:off classforname + val shimsClass = Class.forName(hadoop20SShimsClassName) + // scalastyle:on classforname + val shims = classOf[HadoopShims].cast(shimsClass.newInstance()) + shimsField.setAccessible(true) + shimsField.set(null, shims) + } catch { case cause: Throwable => + throw new RuntimeException(s"Failed to load $hadoop20SShimsClassName", cause) + } + } + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Circular buffer to hold what hive prints to STDOUT and ERR. Only printed when failures occur. private val outputBuffer = new CircularBuffer() diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala index 8141136de5311..456c94ac0783f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/HiveTableScan.scala @@ -51,9 +51,12 @@ case class HiveTableScan( require(partitionPruningPred.isEmpty || relation.hiveQlTable.isPartitioned, "Partition pruning predicates only supported for partitioned tables.") +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet ++ AttributeSet(partitionPruningPred.flatMap(_.references)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Retrieve the original attributes based on expression ID so that capitalization matches. val attributes = requestedAttributes.map(relation.attributeMap) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala index a61e162f48f1b..b4704023acb9d 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/ScriptTransformation.scala @@ -60,8 +60,11 @@ case class ScriptTransformation( override protected def otherCopyArgs: Seq[HiveContext] = sc :: Nil +<<<<<<< HEAD +======= override def producedAttributes: AttributeSet = outputSet -- inputSet +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private val serializedHiveConf = new SerializableConfiguration(sc.hiveconf) protected override def doExecute(): RDD[InternalRow] = { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala index 777e7857d2db2..1e9d96136eadb 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveWriterContainers.scala @@ -27,10 +27,16 @@ import org.apache.hadoop.hive.conf.HiveConf.ConfVars import org.apache.hadoop.hive.ql.exec.{FileSinkOperator, Utilities} import org.apache.hadoop.hive.ql.io.{HiveFileFormatUtils, HiveOutputFormat} import org.apache.hadoop.hive.ql.plan.TableDesc +<<<<<<< HEAD +import org.apache.hadoop.io.Writable +import org.apache.hadoop.mapred._ +import org.apache.hadoop.hive.common.FileUtils +======= import org.apache.hadoop.hive.common.FileUtils import org.apache.hadoop.io.Writable import org.apache.hadoop.mapred._ import org.apache.hadoop.mapreduce.TaskType +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.mapred.SparkHadoopMapRedUtil import org.apache.spark.{Logging, SerializableWritable, SparkHadoopWriter} @@ -47,7 +53,13 @@ import org.apache.spark.util.SerializableJobConf private[hive] class SparkHiveWriterContainer( jobConf: JobConf, fileSinkConf: FileSinkDesc) +<<<<<<< HEAD + extends Logging + with SparkHadoopMapRedUtil + with Serializable { +======= extends Logging with Serializable { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private val now = new Date() private val tableDesc: TableDesc = fileSinkConf.getTableInfo @@ -67,8 +79,13 @@ private[hive] class SparkHiveWriterContainer( @transient private var writer: FileSinkOperator.RecordWriter = null @transient protected lazy val committer = conf.value.getOutputCommitter +<<<<<<< HEAD + @transient protected lazy val jobContext = newJobContext(conf.value, jID.value) + @transient private lazy val taskContext = newTaskAttemptContext(conf.value, taID.value) +======= @transient protected lazy val jobContext = new JobContextImpl(conf.value, jID.value) @transient private lazy val taskContext = new TaskAttemptContextImpl(conf.value, taID.value) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 @transient private lazy val outputFormat = conf.value.getOutputFormat.asInstanceOf[HiveOutputFormat[AnyRef, Writable]] @@ -130,7 +147,11 @@ private[hive] class SparkHiveWriterContainer( jID = new SerializableWritable[JobID](SparkHadoopWriter.createJobID(now, jobId)) taID = new SerializableWritable[TaskAttemptID]( +<<<<<<< HEAD + new TaskAttemptID(new TaskID(jID.value, true, splitID), attemptID)) +======= new TaskAttemptID(new TaskID(jID.value, TaskType.MAP, splitID), attemptID)) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } private def setConfParams() { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala index b91a14bdbcc48..bc5dbccbc6bf4 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileOperator.scala @@ -95,7 +95,11 @@ private[orc] object OrcFileOperator extends Logging { val fs = origPath.getFileSystem(conf) val path = origPath.makeQualified(fs.getUri, fs.getWorkingDirectory) val paths = SparkHadoopUtil.get.listLeafStatuses(fs, origPath) +<<<<<<< HEAD + .filterNot(_.isDir) +======= .filterNot(_.isDirectory) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 .map(_.getPath) .filterNot(_.getName.startsWith("_")) .filterNot(_.getName.startsWith(".")) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala index 165210f9ff301..e29f98beddf7a 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala @@ -26,6 +26,17 @@ import org.apache.spark.Logging import org.apache.spark.sql.sources._ /** +<<<<<<< HEAD + * It may be optimized by push down partial filters. But we are conservative here. + * Because if some filters fail to be parsed, the tree may be corrupted, + * and cannot be used anymore. + */ +private[orc] object OrcFilters extends Logging { + def createFilter(filters: Array[Filter]): Option[SearchArgument] = { + for { + // Combines all filters with `And`s to produce a single conjunction predicate + conjunction <- filters.reduceOption(And) +======= * Helper object for building ORC `SearchArgument`s, which are used for ORC predicate push-down. * * Due to limitation of ORC `SearchArgument` builder, we had to end up with a pretty weird double- @@ -67,6 +78,7 @@ private[orc] object OrcFilters extends Logging { for { // Combines all convertible filters using `And` to produce a single conjunction conjunction <- convertibleFilters.reduceOption(And) +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Then tries to build a single ORC `SearchArgument` for the conjunction predicate builder <- buildSearchArgument(conjunction, SearchArgumentFactory.newBuilder()) } yield builder.build() @@ -82,6 +94,31 @@ private[orc] object OrcFilters extends Logging { case _ => false } +<<<<<<< HEAD + // lian: I probably missed something here, and had to end up with a pretty weird double-checking + // pattern when converting `And`/`Or`/`Not` filters. + // + // The annoying part is that, `SearchArgument` builder methods like `startAnd()` `startOr()`, + // and `startNot()` mutate internal state of the builder instance. This forces us to translate + // all convertible filters with a single builder instance. However, before actually converting a + // filter, we've no idea whether it can be recognized by ORC or not. Thus, when an inconvertible + // filter is found, we may already end up with a builder whose internal state is inconsistent. + // + // For example, to convert an `And` filter with builder `b`, we call `b.startAnd()` first, and + // then try to convert its children. Say we convert `left` child successfully, but find that + // `right` child is inconvertible. Alas, `b.startAnd()` call can't be rolled back, and `b` is + // inconsistent now. + // + // The workaround employed here is that, for `And`/`Or`/`Not`, we first try to convert their + // children with brand new builders, and only do the actual conversion with the right builder + // instance when the children are proven to be convertible. + // + // P.S.: Hive seems to use `SearchArgument` together with `ExprNodeGenericFuncDesc` only. + // Usage of builder methods mentioned above can only be found in test code, where all tested + // filters are known to be convertible. + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 expression match { case And(left, right) => // At here, it is not safe to just convert one side if we do not understand the @@ -112,10 +149,13 @@ private[orc] object OrcFilters extends Logging { negate <- buildSearchArgument(child, builder.startNot()) } yield negate.end() +<<<<<<< HEAD +======= // NOTE: For all case branches dealing with leaf predicates below, the additional `startAnd()` // call is mandatory. ORC `SearchArgument` builder requires that all leaf predicates must be // wrapped by a "parent" predicate (`And`, `Or`, or `Not`). +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case EqualTo(attribute, value) if isSearchableLiteral(value) => Some(builder.startAnd().equals(attribute, value).end()) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala index 84ef12a68e1ba..3f55c5d153088 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcRelation.scala @@ -33,6 +33,11 @@ import org.apache.hadoop.mapreduce.{Job, TaskAttemptContext} import org.apache.spark.Logging import org.apache.spark.broadcast.Broadcast +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.mapred.SparkHadoopMapRedUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.{HadoopRDD, RDD} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.expressions._ @@ -65,7 +70,11 @@ private[orc] class OrcOutputWriter( path: String, dataSchema: StructType, context: TaskAttemptContext) +<<<<<<< HEAD + extends OutputWriter with SparkHadoopMapRedUtil with HiveInspectors { +======= extends OutputWriter with HiveInspectors { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private val serializer = { val table = new Properties() @@ -75,7 +84,11 @@ private[orc] class OrcOutputWriter( }.mkString(":")) val serde = new OrcSerde +<<<<<<< HEAD + val configuration = SparkHadoopUtil.get.getConfigurationFromJobContext(context) +======= val configuration = context.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 serde.initialize(configuration, table) serde } @@ -97,9 +110,15 @@ private[orc] class OrcOutputWriter( private lazy val recordWriter: RecordWriter[NullWritable, Writable] = { recordWriterInstantiated = true +<<<<<<< HEAD + val conf = SparkHadoopUtil.get.getConfigurationFromJobContext(context) + val uniqueWriteJobId = conf.get("spark.sql.sources.writeJobUUID") + val taskAttemptId = SparkHadoopUtil.get.getTaskAttemptIDFromTaskAttemptContext(context) +======= val conf = context.getConfiguration val uniqueWriteJobId = conf.get("spark.sql.sources.writeJobUUID") val taskAttemptId = context.getTaskAttemptID +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val partition = taskAttemptId.getTaskID.getId val filename = f"part-r-$partition%05d-$uniqueWriteJobId.orc" @@ -206,7 +225,11 @@ private[sql] class OrcRelation( } override def prepareJobForWrite(job: Job): OutputWriterFactory = { +<<<<<<< HEAD + SparkHadoopUtil.get.getConfigurationFromJobContext(job) match { +======= job.getConfiguration match { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case conf: JobConf => conf.setOutputFormat(classOf[OrcOutputFormat]) case conf => @@ -287,8 +310,13 @@ private[orc] case class OrcTableScan( } def execute(): RDD[InternalRow] = { +<<<<<<< HEAD + val job = new Job(sqlContext.sparkContext.hadoopConfiguration) + val conf = SparkHadoopUtil.get.getConfigurationFromJobContext(job) +======= val job = Job.getInstance(sqlContext.sparkContext.hadoopConfiguration) val conf = job.getConfiguration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Tries to push down filters if ORC filter push-down is enabled if (sqlContext.conf.orcFilterPushDown) { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 013fbab0a812b..504df998c53bc 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -410,10 +410,14 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { try { // HACK: Hive is too noisy by default. org.apache.log4j.LogManager.getCurrentLoggers.asScala.foreach { log => +<<<<<<< HEAD + log.asInstanceOf[org.apache.log4j.Logger].setLevel(org.apache.log4j.Level.WARN) +======= val logger = log.asInstanceOf[org.apache.log4j.Logger] if (!logger.getName.contains("org.apache.spark")) { logger.setLevel(org.apache.log4j.Level.WARN) } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } cacheManager.clearCache() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 71f05f3b00291..9b517da04f49c 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -103,7 +103,11 @@ class HiveSparkSubmitSuite runSparkSubmit(args) } +<<<<<<< HEAD + ignore("SPARK-9757 Persist Parquet relation with decimal column") { +======= test("SPARK-9757 Persist Parquet relation with decimal column") { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val unusedJar = TestUtils.createJarWithClasses(Seq.empty) val args = Seq( "--class", SPARK_9757.getClass.getName.stripSuffix("$"), diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala index ea82b8c459695..df4385c76fbf7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/WindowQuerySuite.scala @@ -227,6 +227,8 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto Row("Manufacturer#5", "almond azure blanched chiffon midnight", 23, 315.9225931564038, 315.9225931564038, 46, 99807.08486666666, -0.9978877469246935, -5664.856666666666))) // scalastyle:on } +<<<<<<< HEAD +======= test("null arguments") { checkAnswer(sql(""" @@ -242,4 +244,5 @@ class WindowQuerySuite extends QueryTest with SQLTestUtils with TestHiveSingleto |from part """.stripMargin)) } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala index 47e73b4006fa5..c0192d8d9d87d 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/OrcSourceSuite.scala @@ -21,9 +21,14 @@ import java.io.File import org.scalatest.BeforeAndAfterAll +<<<<<<< HEAD +import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.hive.test.TestHiveSingleton +======= import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.sources._ import org.apache.spark.sql.{QueryTest, Row} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 case class OrcData(intField: Int, stringField: String) @@ -175,6 +180,8 @@ class OrcSourceSuite extends OrcSuite { |) """.stripMargin) } +<<<<<<< HEAD +======= test("SPARK-12218 Converting conjunctions into ORC SearchArguments") { // The `LessThan` should be converted while the `StringContains` shouldn't @@ -204,4 +211,5 @@ class OrcSourceSuite extends OrcSuite { )).get.toString } } +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala index e10d21d5e3682..ea32d7536e216 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/SimpleTextRelation.scala @@ -25,6 +25,10 @@ import org.apache.hadoop.io.{NullWritable, Text} import org.apache.hadoop.mapreduce.lib.output.{FileOutputFormat, TextOutputFormat} import org.apache.hadoop.mapreduce.{Job, RecordWriter, TaskAttemptContext} +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.RDD import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, expressions} @@ -52,9 +56,15 @@ class AppendingTextOutputFormat(outputFile: Path) extends TextOutputFormat[NullW numberFormat.setGroupingUsed(false) override def getDefaultWorkFile(context: TaskAttemptContext, extension: String): Path = { +<<<<<<< HEAD + val configuration = SparkHadoopUtil.get.getConfigurationFromJobContext(context) + val uniqueWriteJobId = configuration.get("spark.sql.sources.writeJobUUID") + val taskAttemptId = SparkHadoopUtil.get.getTaskAttemptIDFromTaskAttemptContext(context) +======= val configuration = context.getConfiguration val uniqueWriteJobId = configuration.get("spark.sql.sources.writeJobUUID") val taskAttemptId = context.getTaskAttemptID +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val split = taskAttemptId.getTaskID.getId val name = FileOutputFormat.getOutputName(context) new Path(outputFile, s"$name-${numberFormat.format(split)}-$uniqueWriteJobId") diff --git a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala index 61b230ab6f98a..e1438c4148c9a 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/Checkpoint.scala @@ -21,6 +21,17 @@ import java.io._ import java.util.concurrent.Executors import java.util.concurrent.RejectedExecutionException +<<<<<<< HEAD +import org.apache.hadoop.fs.{FileSystem, Path} +import org.apache.hadoop.conf.Configuration + +import org.apache.spark.{SparkException, SparkConf, Logging} +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.io.CompressionCodec +import org.apache.spark.util.{MetadataCleaner, Utils} +import org.apache.spark.streaming.scheduler.JobGenerator + +======= import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} @@ -29,6 +40,7 @@ import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.io.CompressionCodec import org.apache.spark.streaming.scheduler.JobGenerator import org.apache.spark.util.{MetadataCleaner, Utils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class Checkpoint(ssc: StreamingContext, val checkpointTime: Time) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala index eedb42c0611c2..f7571d99d16ed 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/DStreamGraph.scala @@ -17,6 +17,13 @@ package org.apache.spark.streaming +<<<<<<< HEAD +import scala.collection.mutable.ArrayBuffer +import java.io.{ObjectInputStream, IOException, ObjectOutputStream} +import org.apache.spark.Logging +import org.apache.spark.streaming.scheduler.Job +import org.apache.spark.streaming.dstream.{DStream, ReceiverInputDStream, InputDStream} +======= import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer @@ -24,6 +31,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.Logging import org.apache.spark.streaming.dstream.{DStream, InputDStream, ReceiverInputDStream} import org.apache.spark.streaming.scheduler.Job +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.util.Utils final private[streaming] class DStreamGraph extends Serializable with Logging { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala b/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala index 0b094558dfd59..e064bf4831e18 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StateSpec.scala @@ -18,13 +18,20 @@ package org.apache.spark.streaming import com.google.common.base.Optional +<<<<<<< HEAD +======= import org.apache.spark.{HashPartitioner, Partitioner} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.annotation.Experimental import org.apache.spark.api.java.{JavaPairRDD, JavaUtils} import org.apache.spark.api.java.function.{Function3 => JFunction3, Function4 => JFunction4} import org.apache.spark.rdd.RDD import org.apache.spark.util.ClosureCleaner +<<<<<<< HEAD +import org.apache.spark.{HashPartitioner, Partitioner} +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: Experimental :: diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala index c4a10aa2dd3b9..b78d8756af712 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala @@ -29,8 +29,13 @@ import akka.actor.{Props, SupervisorStrategy} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.io.{BytesWritable, LongWritable, Text} +<<<<<<< HEAD +import org.apache.hadoop.mapreduce.lib.input.TextInputFormat +import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} +======= import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} import org.apache.hadoop.mapreduce.lib.input.TextInputFormat +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark._ import org.apache.spark.annotation.{DeveloperApi, Experimental} diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala index a59f4efccb575..9e8117a8c75fc 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaDStream.scala @@ -17,6 +17,16 @@ package org.apache.spark.streaming.api.java +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.api.java.function.{Function => JFunction} +import org.apache.spark.api.java.JavaRDD +import org.apache.spark.storage.StorageLevel +import org.apache.spark.rdd.RDD + +import scala.language.implicitConversions +import scala.reflect.ClassTag +======= import scala.language.implicitConversions import scala.reflect.ClassTag @@ -25,6 +35,7 @@ import org.apache.spark.api.java.function.{Function => JFunction} import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.streaming.dstream.DStream /** diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala index 2bf3ccec6bc55..c136845e19c59 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairDStream.scala @@ -17,7 +17,11 @@ package org.apache.spark.streaming.api.java +<<<<<<< HEAD +import java.lang.{Long => JLong, Iterable => JIterable} +======= import java.lang.{Iterable => JIterable, Long => JLong} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.util.{List => JList} import scala.collection.JavaConverters._ diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairInputDStream.scala index da0db02236a1f..4ddc663d31b76 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaPairInputDStream.scala @@ -17,11 +17,19 @@ package org.apache.spark.streaming.api.java +<<<<<<< HEAD +import org.apache.spark.streaming.dstream.InputDStream + +import scala.language.implicitConversions +import scala.reflect.ClassTag + +======= import scala.language.implicitConversions import scala.reflect.ClassTag import org.apache.spark.streaming.dstream.InputDStream +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A Java-friendly interface to [[org.apache.spark.streaming.dstream.InputDStream]] of * key-value pairs. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala index 00f9d8a9e8817..5d7444805cd9c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/java/JavaStreamingContext.scala @@ -17,15 +17,23 @@ package org.apache.spark.streaming.api.java +<<<<<<< HEAD +import java.lang.{Boolean => JBoolean} +import java.io.{Closeable, InputStream} +======= import java.io.{Closeable, InputStream} import java.lang.{Boolean => JBoolean} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.util.{List => JList, Map => JMap} import scala.collection.JavaConverters._ import scala.reflect.ClassTag import akka.actor.{Props, SupervisorStrategy} +<<<<<<< HEAD +======= import org.apache.hadoop.conf.Configuration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.fs.Path import org.apache.hadoop.mapreduce.{InputFormat => NewInputFormat} @@ -38,9 +46,16 @@ import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ +<<<<<<< HEAD +import org.apache.spark.streaming.scheduler.StreamingListener +import org.apache.spark.streaming.dstream.DStream +import org.apache.spark.streaming.receiver.Receiver +import org.apache.hadoop.conf.Configuration +======= import org.apache.spark.streaming.dstream.DStream import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.StreamingListener +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * A Java-friendly version of [[org.apache.spark.streaming.StreamingContext]] which is the main diff --git a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala index 953fe95177f02..c224aef23380e 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/api/python/PythonDStream.scala @@ -30,11 +30,20 @@ import org.apache.spark.SparkException import org.apache.spark.api.java._ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel +<<<<<<< HEAD +import org.apache.spark.streaming.{Interval, Duration, Time} +import org.apache.spark.streaming.dstream._ +import org.apache.spark.streaming.api.java._ +import org.apache.spark.util.Utils + + +======= import org.apache.spark.streaming.{Duration, Interval, Time} import org.apache.spark.streaming.api.java._ import org.apache.spark.streaming.dstream._ import org.apache.spark.util.Utils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Interface for Python callback function which is used to transform RDDs */ diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala index 695384deb32d7..418b6c098acb7 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ConstantInputDStream.scala @@ -20,7 +20,11 @@ package org.apache.spark.streaming.dstream import scala.reflect.ClassTag import org.apache.spark.rdd.RDD +<<<<<<< HEAD +import org.apache.spark.streaming.{Time, StreamingContext} +======= import org.apache.spark.streaming.{StreamingContext, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * An input stream that always returns the same RDD on each timestep. Useful for testing. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala index 3eff174c2b66c..3fbed0e13c17c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/DStreamCheckpointData.scala @@ -17,6 +17,13 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import scala.collection.mutable.HashMap +import scala.reflect.ClassTag +import java.io.{ObjectOutputStream, ObjectInputStream, IOException} +import org.apache.hadoop.fs.Path +import org.apache.hadoop.fs.FileSystem +======= import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.HashMap @@ -24,6 +31,7 @@ import scala.reflect.ClassTag import org.apache.hadoop.fs.{FileSystem, Path} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.Logging import org.apache.spark.streaming.Time import org.apache.spark.util.Utils diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala index 43079880b2352..6de8fed0f082f 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FilteredDStream.scala @@ -17,11 +17,18 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.rdd.RDD +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class FilteredDStream[T: ClassTag]( parent: DStream[T], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala index 778d556d2efb9..a6ae7850992f4 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMapValuedDStream.scala @@ -17,12 +17,20 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class FlatMapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala index 96a444a7baa5e..14826be43a571 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/FlatMappedDStream.scala @@ -17,11 +17,18 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.rdd.RDD +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class FlatMappedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala index a0fadee8a9844..9e072a2f79336 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ForEachDStream.scala @@ -17,11 +17,18 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.streaming.scheduler.Job +import scala.reflect.ClassTag +======= import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} import org.apache.spark.streaming.scheduler.Job +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * An internal DStream used to represent output operations like DStream.foreachRDD. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala index 9f1252f091a63..05264b0a527ef 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/GlommedDStream.scala @@ -17,11 +17,18 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.rdd.RDD +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class GlommedDStream[T: ClassTag](parent: DStream[T]) extends DStream[Array[T]](parent.ssc) { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala index bcdf1752e61e7..dfedb43b53c4f 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapPartitionedDStream.scala @@ -17,11 +17,18 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.rdd.RDD +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class MapPartitionedDStream[T: ClassTag, U: ClassTag]( parent: DStream[T], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala index 855c3dd096f4b..2ab0181ea39ce 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapValuedDStream.scala @@ -17,12 +17,20 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class MapValuedDStream[K: ClassTag, V: ClassTag, U: ClassTag]( parent: DStream[(K, V)], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala index 36ff9c7e6182f..ffb95765b4e21 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MapWithStateDStream.scala @@ -24,8 +24,13 @@ import org.apache.spark.annotation.Experimental import org.apache.spark.rdd.{EmptyRDD, RDD} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ +<<<<<<< HEAD +import org.apache.spark.streaming.rdd.{MapWithStateRDD, MapWithStateRDDRecord} +import org.apache.spark.streaming.dstream.InternalMapWithStateDStream._ +======= import org.apache.spark.streaming.dstream.InternalMapWithStateDStream._ import org.apache.spark.streaming.rdd.{MapWithStateRDD, MapWithStateRDDRecord} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: Experimental :: diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala index e11d82697af89..c2e5ce49d870b 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/MappedDStream.scala @@ -17,11 +17,18 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.rdd.RDD +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class MappedDStream[T: ClassTag, U: ClassTag] ( parent: DStream[T], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala index babc722709325..b5d1b0290d1e3 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PairDStreamFunctions.scala @@ -24,12 +24,21 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.mapred.{JobConf, OutputFormat} import org.apache.hadoop.mapreduce.{OutputFormat => NewOutputFormat} +<<<<<<< HEAD +import org.apache.spark.annotation.Experimental +import org.apache.spark.rdd.RDD +import org.apache.spark.streaming.StreamingContext.rddToFileName +import org.apache.spark.streaming._ +import org.apache.spark.util.{SerializableConfiguration, SerializableJobConf} +import org.apache.spark.{HashPartitioner, Partitioner} +======= import org.apache.spark.{HashPartitioner, Partitioner} import org.apache.spark.annotation.Experimental import org.apache.spark.rdd.RDD import org.apache.spark.streaming._ import org.apache.spark.streaming.StreamingContext.rddToFileName import org.apache.spark.util.{SerializableConfiguration, SerializableJobConf} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Extra functions available on DStream of (key, value) pairs through an implicit conversion. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala index 2442e4c01a0c0..9aeaad73beca0 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/PluggableInputDStream.scala @@ -17,9 +17,14 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.streaming.StreamingContext +import scala.reflect.ClassTag +======= import scala.reflect.ClassTag import org.apache.spark.streaming.StreamingContext +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.streaming.receiver.Receiver private[streaming] diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala index a8d108de6c3e1..c586db12f10ec 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/QueueInputDStream.scala @@ -23,7 +23,11 @@ import scala.collection.mutable.{ArrayBuffer, Queue} import scala.reflect.ClassTag import org.apache.spark.rdd.{RDD, UnionRDD} +<<<<<<< HEAD +import org.apache.spark.streaming.{Time, StreamingContext} +======= import org.apache.spark.streaming.{StreamingContext, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class QueueInputDStream[T: ClassTag]( diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala index ac73dca05a674..3df1a532657eb 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/RawInputDStream.scala @@ -17,6 +17,21 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.{Logging, SparkEnv} +import org.apache.spark.storage.{StorageLevel, StreamBlockId} +import org.apache.spark.streaming.StreamingContext + +import scala.reflect.ClassTag + +import java.net.InetSocketAddress +import java.nio.ByteBuffer +import java.nio.channels.{ReadableByteChannel, SocketChannel} +import java.io.EOFException +import java.util.concurrent.ArrayBlockingQueue +import org.apache.spark.streaming.receiver.Receiver + +======= import java.io.EOFException import java.net.InetSocketAddress import java.nio.ByteBuffer @@ -29,6 +44,7 @@ import org.apache.spark.{Logging, SparkEnv} import org.apache.spark.storage.{StorageLevel, StreamBlockId} import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.receiver.Receiver +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * An input stream that reads blocks of serialized objects from a given network address. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala index a18551fac719a..c64bba17b9105 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReceiverInputDStream.scala @@ -21,12 +21,21 @@ import scala.reflect.ClassTag import org.apache.spark.rdd.{BlockRDD, RDD} import org.apache.spark.storage.BlockId +<<<<<<< HEAD +import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD +import org.apache.spark.streaming.receiver.Receiver +import org.apache.spark.streaming.scheduler.rate.RateEstimator +import org.apache.spark.streaming.scheduler.{ReceivedBlockInfo, RateController, StreamInputInfo} +import org.apache.spark.streaming.util.WriteAheadLogUtils +import org.apache.spark.streaming.{StreamingContext, Time} +======= import org.apache.spark.streaming.{StreamingContext, Time} import org.apache.spark.streaming.rdd.WriteAheadLogBackedBlockRDD import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.streaming.scheduler.{RateController, ReceivedBlockInfo, StreamInputInfo} import org.apache.spark.streaming.scheduler.rate.RateEstimator import org.apache.spark.streaming.util.WriteAheadLogUtils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Abstract class for defining any [[org.apache.spark.streaming.dstream.InputDStream]] diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala index 535954908539e..e7c3088b8dfe3 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ReducedWindowedDStream.scala @@ -17,6 +17,20 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.{CoGroupedRDD, MapPartitionsRDD} +import org.apache.spark.Partitioner +import org.apache.spark.SparkContext._ +import org.apache.spark.storage.StorageLevel + +import scala.collection.mutable.ArrayBuffer +import org.apache.spark.streaming.{Duration, Interval, Time} + +import scala.collection.mutable.ArrayBuffer +import scala.reflect.ClassTag + +======= import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag @@ -26,6 +40,7 @@ import org.apache.spark.rdd.{CoGroupedRDD, MapPartitionsRDD, RDD} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Duration, Interval, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class ReducedWindowedDStream[K: ClassTag, V: ClassTag]( parent: DStream[(K, V)], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala index 0fe15440dd44d..8cda8e47cefa9 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/ShuffledDStream.scala @@ -17,12 +17,20 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.Partitioner +import org.apache.spark.rdd.RDD +import org.apache.spark.SparkContext._ +import org.apache.spark.streaming.{Duration, Time} +import scala.reflect.ClassTag +======= import scala.reflect.ClassTag import org.apache.spark.Partitioner import org.apache.spark.SparkContext._ import org.apache.spark.rdd.RDD import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class ShuffledDStream[K: ClassTag, V: ClassTag, C: ClassTag]( diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala index 10644b9201918..2b7beae384203 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala @@ -17,6 +17,20 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import scala.util.control.NonFatal + +import org.apache.spark.streaming.StreamingContext +import org.apache.spark.storage.StorageLevel +import org.apache.spark.util.NextIterator + +import scala.reflect.ClassTag + +import java.io._ +import java.net.{UnknownHostException, Socket} +import org.apache.spark.Logging +import org.apache.spark.streaming.receiver.Receiver +======= import java.io._ import java.net.{Socket, UnknownHostException} @@ -28,6 +42,7 @@ import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.receiver.Receiver import org.apache.spark.util.NextIterator +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class SocketInputDStream[T: ClassTag]( diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala index ebbe139a2cdf8..de728fd21436f 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/StateDStream.scala @@ -17,6 +17,16 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +import org.apache.spark.rdd.RDD +import org.apache.spark.Partitioner +import org.apache.spark.SparkContext._ +import org.apache.spark.storage.StorageLevel +import org.apache.spark.streaming.{Duration, Time} + +import scala.reflect.ClassTag + +======= import scala.reflect.ClassTag import org.apache.spark.Partitioner @@ -25,6 +35,7 @@ import org.apache.spark.rdd.RDD import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class StateDStream[K: ClassTag, V: ClassTag, S: ClassTag]( parent: DStream[(K, V)], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala index 2b07dd6185861..31d41d65d884e 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/UnionDStream.scala @@ -21,8 +21,14 @@ import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag import org.apache.spark.SparkException +<<<<<<< HEAD +import org.apache.spark.streaming.{Duration, Time} +import org.apache.spark.rdd.RDD +import org.apache.spark.rdd.UnionRDD +======= import org.apache.spark.rdd.{RDD, UnionRDD} import org.apache.spark.streaming.{Duration, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class UnionDStream[T: ClassTag](parents: Array[DStream[T]]) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala index ee50a8d024e12..148162e53a89f 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/WindowedDStream.scala @@ -17,13 +17,21 @@ package org.apache.spark.streaming.dstream +<<<<<<< HEAD +======= import scala.reflect.ClassTag +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rdd.{PartitionerAwareUnionRDD, RDD, UnionRDD} import org.apache.spark.storage.StorageLevel import org.apache.spark.streaming._ import org.apache.spark.streaming.Duration +<<<<<<< HEAD +import scala.reflect.ClassTag + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class WindowedDStream[T: ClassTag]( parent: DStream[T], diff --git a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala index 1d2244eaf22b3..9cb6feaf47627 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/rdd/MapWithStateRDD.scala @@ -22,11 +22,19 @@ import java.io.{IOException, ObjectInputStream, ObjectOutputStream} import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag +<<<<<<< HEAD +import org.apache.spark.rdd.{MapPartitionsRDD, RDD} +import org.apache.spark.streaming.{Time, StateImpl, State} +import org.apache.spark.streaming.util.{EmptyStateMap, StateMap} +import org.apache.spark.util.Utils +import org.apache.spark._ +======= import org.apache.spark._ import org.apache.spark.rdd.{MapPartitionsRDD, RDD} import org.apache.spark.streaming.{State, StateImpl, Time} import org.apache.spark.streaming.util.{EmptyStateMap, StateMap} import org.apache.spark.util.Utils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Record storing the keyed-state [[MapWithStateRDD]]. Each record contains a [[StateMap]] and a diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala index 109af32cf4bbd..85480a67be291 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/BlockGenerator.scala @@ -21,7 +21,11 @@ import java.util.concurrent.{ArrayBlockingQueue, TimeUnit} import scala.collection.mutable.ArrayBuffer +<<<<<<< HEAD +import org.apache.spark.{SparkException, Logging, SparkConf} +======= import org.apache.spark.{Logging, SparkConf, SparkException} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.storage.StreamBlockId import org.apache.spark.streaming.util.RecurringTimer import org.apache.spark.util.{Clock, SystemClock} diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala index 43c605af73716..29f89f44de44d 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceivedBlockHandler.scala @@ -17,18 +17,30 @@ package org.apache.spark.streaming.receiver +<<<<<<< HEAD +import scala.concurrent.duration._ +import scala.concurrent.{Await, ExecutionContext, Future} +======= import scala.concurrent.{Await, ExecutionContext, Future} import scala.concurrent.duration._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.language.{existentials, postfixOps} import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path +<<<<<<< HEAD +======= import org.apache.spark.{Logging, SparkConf, SparkException} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.storage._ import org.apache.spark.streaming.receiver.WriteAheadLogBasedBlockHandler._ import org.apache.spark.streaming.util.{WriteAheadLogRecordHandle, WriteAheadLogUtils} import org.apache.spark.util.{Clock, SystemClock, ThreadUtils} +<<<<<<< HEAD +import org.apache.spark.{Logging, SparkConf, SparkException} +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** Trait that represents the metadata related to storage of blocks */ private[streaming] trait ReceivedBlockStoreResult { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala index b08152485ab5b..66356c3748901 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/Receiver.scala @@ -22,8 +22,13 @@ import java.nio.ByteBuffer import scala.collection.mutable.ArrayBuffer import scala.collection.JavaConverters._ +<<<<<<< HEAD +import org.apache.spark.storage.StorageLevel +import org.apache.spark.annotation.DeveloperApi +======= import org.apache.spark.annotation.DeveloperApi import org.apache.spark.storage.StorageLevel +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala index c42a9ac233f87..bd8dea76da661 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisor.scala @@ -24,9 +24,15 @@ import scala.collection.mutable.ArrayBuffer import scala.concurrent._ import scala.util.control.NonFatal +<<<<<<< HEAD +import org.apache.spark.{SparkEnv, Logging, SparkConf} +import org.apache.spark.storage.StreamBlockId +import org.apache.spark.util.{Utils, ThreadUtils} +======= import org.apache.spark.{Logging, SparkConf, SparkEnv} import org.apache.spark.storage.StreamBlockId import org.apache.spark.util.{ThreadUtils, Utils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Abstract class that is responsible for supervising a Receiver in the worker. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala index b774b6b9a55d1..2bb3851dfa59c 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/receiver/ReceiverSupervisorImpl.scala @@ -26,13 +26,20 @@ import scala.collection.mutable.ArrayBuffer import com.google.common.base.Throwables import org.apache.hadoop.conf.Configuration +<<<<<<< HEAD +======= import org.apache.spark.{Logging, SparkEnv, SparkException} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.rpc.{RpcEnv, ThreadSafeRpcEndpoint} import org.apache.spark.storage.StreamBlockId import org.apache.spark.streaming.Time import org.apache.spark.streaming.scheduler._ import org.apache.spark.streaming.util.WriteAheadLogUtils import org.apache.spark.util.RpcUtils +<<<<<<< HEAD +import org.apache.spark.{Logging, SparkEnv, SparkException} +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Concrete implementation of [[org.apache.spark.streaming.receiver.ReceiverSupervisor]] diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala index 92da0ced28fbc..61f557822dac2 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/InputInfoTracker.scala @@ -21,7 +21,11 @@ import scala.collection.mutable import org.apache.spark.Logging import org.apache.spark.annotation.DeveloperApi +<<<<<<< HEAD +import org.apache.spark.streaming.{Time, StreamingContext} +======= import org.apache.spark.streaming.{StreamingContext, Time} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala index 7050d7ef45240..2648be7cedd2a 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/Job.scala @@ -20,7 +20,11 @@ package org.apache.spark.streaming.scheduler import scala.util.{Failure, Try} import org.apache.spark.streaming.Time +<<<<<<< HEAD +import org.apache.spark.util.{Utils, CallSite} +======= import org.apache.spark.util.{CallSite, Utils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Class representing a Spark computation. It may contain multiple Spark jobs. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala index a5a01e77639c4..89d91692d0750 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala @@ -19,10 +19,17 @@ package org.apache.spark.streaming.scheduler import scala.util.{Failure, Success, Try} +<<<<<<< HEAD +import org.apache.spark.{SparkEnv, Logging} +import org.apache.spark.streaming.{Checkpoint, CheckpointWriter, Time} +import org.apache.spark.streaming.util.RecurringTimer +import org.apache.spark.util.{Utils, Clock, EventLoop, ManualClock} +======= import org.apache.spark.{Logging, SparkEnv} import org.apache.spark.streaming.{Checkpoint, CheckpointWriter, Time} import org.apache.spark.streaming.util.RecurringTimer import org.apache.spark.util.{Clock, EventLoop, ManualClock, Utils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** Event classes for JobGenerator */ private[scheduler] sealed trait JobGeneratorEvent diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala index 60b5c838e9734..78b215959e12d 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceivedBlockTracker.scala @@ -27,11 +27,18 @@ import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path +<<<<<<< HEAD +======= import org.apache.spark.{Logging, SparkConf} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.network.util.JavaUtils import org.apache.spark.streaming.Time import org.apache.spark.streaming.util.{BatchedWriteAheadLog, WriteAheadLog, WriteAheadLogUtils} import org.apache.spark.util.{Clock, Utils} +<<<<<<< HEAD +import org.apache.spark.{Logging, SparkConf} +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** Trait representing any event in the ReceivedBlockTracker that updates its state. */ private[streaming] sealed trait ReceivedBlockTrackerLogEvent diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala index 9ddf176aee84c..557e3d21e31c3 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala @@ -20,14 +20,22 @@ package org.apache.spark.streaming.scheduler import java.util.concurrent.{CountDownLatch, TimeUnit} import scala.collection.mutable.HashMap +<<<<<<< HEAD +import scala.concurrent.{Future, ExecutionContext} +======= import scala.concurrent.{ExecutionContext, Future} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.language.existentials import scala.util.{Failure, Success} import org.apache.spark._ import org.apache.spark.rdd.RDD import org.apache.spark.rpc._ +<<<<<<< HEAD +import org.apache.spark.scheduler.{TaskLocation, ExecutorCacheTaskLocation} +======= import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, TaskLocation} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.streaming.{StreamingContext, Time} import org.apache.spark.streaming.receiver._ import org.apache.spark.streaming.util.WriteAheadLogUtils diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala index 58fc78d552106..046461d3f5bc6 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/StreamingListener.scala @@ -19,8 +19,13 @@ package org.apache.spark.streaming.scheduler import scala.collection.mutable.Queue +<<<<<<< HEAD +import org.apache.spark.util.Distribution +import org.apache.spark.annotation.DeveloperApi +======= import org.apache.spark.annotation.DeveloperApi import org.apache.spark.util.Distribution +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * :: DeveloperApi :: diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala index 7635f79a3d2d1..368d1d15bffb7 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/BatchPage.scala @@ -25,8 +25,13 @@ import org.apache.commons.lang3.StringEscapeUtils import org.apache.spark.streaming.Time import org.apache.spark.streaming.ui.StreamingJobProgressListener.{OutputOpId, SparkJobId} +<<<<<<< HEAD +import org.apache.spark.ui.jobs.UIData.JobUIData +import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage} +======= import org.apache.spark.ui.{UIUtils => SparkUIUtils, WebUIPage} import org.apache.spark.ui.jobs.UIData.JobUIData +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[ui] case class SparkJobIdWithUIData(sparkJobId: SparkJobId, jobUIData: Option[JobUIData]) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala index 4908be0536353..b6fb80d6265bb 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingJobProgressListener.scala @@ -17,6 +17,21 @@ package org.apache.spark.streaming.ui +<<<<<<< HEAD +import java.util.LinkedHashMap +import java.util.{Map => JMap} +import java.util.Properties + +import scala.collection.mutable.{ArrayBuffer, Queue, HashMap, SynchronizedBuffer} + +import org.apache.spark.scheduler._ +import org.apache.spark.streaming.{Time, StreamingContext} +import org.apache.spark.streaming.scheduler._ +import org.apache.spark.streaming.scheduler.StreamingListenerReceiverStarted +import org.apache.spark.streaming.scheduler.StreamingListenerBatchStarted +import org.apache.spark.streaming.scheduler.StreamingListenerBatchSubmitted + +======= import java.util.{LinkedHashMap, Map => JMap, Properties} import scala.collection.mutable.{ArrayBuffer, HashMap, Queue, SynchronizedBuffer} @@ -24,6 +39,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap, Queue, SynchronizedBuffer import org.apache.spark.scheduler._ import org.apache.spark.streaming.{StreamingContext, Time} import org.apache.spark.streaming.scheduler._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class StreamingJobProgressListener(ssc: StreamingContext) extends StreamingListener with SparkListener { diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala index 0662c64a0ce9b..5d3b910c35189 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/StreamingTab.scala @@ -21,14 +21,23 @@ import org.apache.spark.{Logging, SparkException} import org.apache.spark.streaming.StreamingContext import org.apache.spark.ui.{SparkUI, SparkUITab} +<<<<<<< HEAD +import StreamingTab._ + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * Spark Web UI tab that shows statistics of a streaming job. * This assumes the given SparkContext has enabled its SparkUI. */ private[spark] class StreamingTab(val ssc: StreamingContext) +<<<<<<< HEAD + extends SparkUITab(getSparkUI(ssc), "streaming") with Logging { +======= extends SparkUITab(StreamingTab.getSparkUI(ssc), "streaming") with Logging { import StreamingTab._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private val STATIC_RESOURCE_DIR = "org/apache/spark/streaming/ui/static" diff --git a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala index a485a46937f31..6443656423960 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/ui/UIUtils.scala @@ -17,14 +17,24 @@ package org.apache.spark.streaming.ui +<<<<<<< HEAD +import scala.xml.Node + +import org.apache.commons.lang3.StringEscapeUtils + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.text.SimpleDateFormat import java.util.TimeZone import java.util.concurrent.TimeUnit +<<<<<<< HEAD +======= import scala.xml.Node import org.apache.commons.lang3.StringEscapeUtils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] object UIUtils { /** diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala index 8cb45cdffa5d7..fa90a5b51d815 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/BatchedWriteAheadLog.scala @@ -18,8 +18,13 @@ package org.apache.spark.streaming.util import java.nio.ByteBuffer +<<<<<<< HEAD +import java.util.concurrent.LinkedBlockingQueue +import java.util.{Iterator => JIterator} +======= import java.util.{Iterator => JIterator} import java.util.concurrent.LinkedBlockingQueue +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala index 15ad2e27d372f..6b0ac6ccd0d46 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLog.scala @@ -17,8 +17,13 @@ package org.apache.spark.streaming.util import java.nio.ByteBuffer +<<<<<<< HEAD +import java.util.concurrent.{RejectedExecutionException, ThreadPoolExecutor} +import java.util.{Iterator => JIterator} +======= import java.util.{Iterator => JIterator} import java.util.concurrent.{RejectedExecutionException, ThreadPoolExecutor} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.collection.JavaConverters._ import scala.collection.mutable.ArrayBuffer @@ -29,8 +34,13 @@ import scala.language.postfixOps import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path +<<<<<<< HEAD +import org.apache.spark.util.{CompletionIterator, ThreadUtils} +import org.apache.spark.{Logging, SparkConf} +======= import org.apache.spark.{Logging, SparkConf} import org.apache.spark.util.{CompletionIterator, ThreadUtils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** * This class manages write ahead log files. @@ -224,8 +234,12 @@ private[streaming] class FileBasedWriteAheadLog( val logDirectoryPath = new Path(logDirectory) val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf) +<<<<<<< HEAD + if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDir) { +======= if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDirectory) { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 val logFileInfo = logFilesTologInfo(fileSystem.listStatus(logDirectoryPath).map { _.getPath }) pastLogs.clear() pastLogs ++= logFileInfo diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogReader.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogReader.scala index e79b139bdd037..92e84b8de45ad 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogReader.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogReader.scala @@ -16,11 +16,18 @@ */ package org.apache.spark.streaming.util +<<<<<<< HEAD +import java.io.{IOException, Closeable, EOFException} +import java.nio.ByteBuffer + +import org.apache.hadoop.conf.Configuration +======= import java.io.{Closeable, EOFException, IOException} import java.nio.ByteBuffer import org.apache.hadoop.conf.Configuration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.Logging /** diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala index 1f5c1d4369b53..401f9d6925181 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/FileBasedWriteAheadLogWriter.scala @@ -19,7 +19,14 @@ package org.apache.spark.streaming.util import java.io._ import java.nio.ByteBuffer +<<<<<<< HEAD +import scala.util.Try + +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.FSDataOutputStream +======= import org.apache.hadoop.conf.Configuration +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.util.Utils @@ -31,6 +38,14 @@ private[streaming] class FileBasedWriteAheadLogWriter(path: String, hadoopConf: private lazy val stream = HdfsUtils.getOutputStream(path, hadoopConf) +<<<<<<< HEAD + private lazy val hadoopFlushMethod = { + // Use reflection to get the right flush operation + val cls = classOf[FSDataOutputStream] + Try(cls.getMethod("hflush")).orElse(Try(cls.getMethod("sync"))).toOption + } +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private var nextOffset = stream.getPos() private var closed = false @@ -54,7 +69,11 @@ private[streaming] class FileBasedWriteAheadLogWriter(path: String, hadoopConf: } private def flush() { +<<<<<<< HEAD + hadoopFlushMethod.foreach { _.invoke(stream) } +======= stream.hflush() +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 // Useful for local file system where hflush/sync does not work (HADOOP-7844) stream.getWrappedStream.flush() } diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala index 5c3c7a6bf1b39..9ed420e914fe5 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RateLimitedOutputStream.scala @@ -17,12 +17,22 @@ package org.apache.spark.streaming.util +<<<<<<< HEAD +import scala.annotation.tailrec + +import java.io.OutputStream +import java.util.concurrent.TimeUnit._ + +import org.apache.spark.Logging + +======= import java.io.OutputStream import java.util.concurrent.TimeUnit._ import scala.annotation.tailrec import org.apache.spark.Logging +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[streaming] class RateLimitedOutputStream(out: OutputStream, desiredBytesPerSec: Int) diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala index e48eaf7913b12..5e932ed766233 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextSender.scala @@ -23,7 +23,11 @@ import java.nio.ByteBuffer import scala.io.Source +<<<<<<< HEAD +import org.apache.spark.{SparkConf, Logging} +======= import org.apache.spark.{Logging, SparkConf} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.serializer.KryoSerializer import org.apache.spark.util.IntParam diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogUtils.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogUtils.scala index ed616d8e810bd..ada6c1c02d124 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogUtils.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/util/WriteAheadLogUtils.scala @@ -21,8 +21,13 @@ import scala.util.control.NonFatal import org.apache.hadoop.conf.Configuration +<<<<<<< HEAD +import org.apache.spark.util.Utils +import org.apache.spark.{Logging, SparkConf, SparkException} +======= import org.apache.spark.{Logging, SparkConf, SparkException} import org.apache.spark.util.Utils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 /** A helper class with utility functions related to the WriteAheadLog interface */ private[streaming] object WriteAheadLogUtils extends Logging { diff --git a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala index a670c7d638192..edd6647948faa 100644 --- a/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala +++ b/streaming/src/test/scala/org/apache/spark/streaming/util/WriteAheadLogSuite.scala @@ -705,8 +705,12 @@ object WriteAheadLogSuite { val logDirectoryPath = new Path(directory) val fileSystem = HdfsUtils.getFileSystemForPath(logDirectoryPath, hadoopConf) +<<<<<<< HEAD + if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDir) { +======= if (fileSystem.exists(logDirectoryPath) && fileSystem.getFileStatus(logDirectoryPath).isDirectory) { +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 fileSystem.listStatus(logDirectoryPath).map { _.getPath() }.sortBy { _.getName().split("-")(1).toLong }.map { diff --git a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala index a947fac1d751d..a64319590e0f4 100644 --- a/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala +++ b/tools/src/main/scala/org/apache/spark/tools/GenerateMIMAIgnore.scala @@ -23,8 +23,13 @@ import java.util.jar.JarFile import scala.collection.mutable import scala.collection.JavaConverters._ +<<<<<<< HEAD +import scala.reflect.runtime.universe.runtimeMirror +import scala.reflect.runtime.{universe => unv} +======= import scala.reflect.runtime.{universe => unv} import scala.reflect.runtime.universe.runtimeMirror +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.util.Try /** diff --git a/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala b/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala index 6fb7184e877ee..9f6441de2aa95 100644 --- a/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala +++ b/tools/src/main/scala/org/apache/spark/tools/JavaAPICompletenessChecker.scala @@ -17,16 +17,26 @@ package org.apache.spark.tools +<<<<<<< HEAD +import java.lang.reflect.{Type, Method} +======= import java.lang.reflect.{Method, Type} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.collection.mutable.ArrayBuffer import scala.language.existentials import org.apache.spark._ import org.apache.spark.api.java._ +<<<<<<< HEAD +import org.apache.spark.rdd.{RDD, DoubleRDDFunctions, PairRDDFunctions, OrderedRDDFunctions} +import org.apache.spark.streaming.StreamingContext +import org.apache.spark.streaming.api.java.{JavaPairDStream, JavaDStream, JavaStreamingContext} +======= import org.apache.spark.rdd.{DoubleRDDFunctions, OrderedRDDFunctions, PairRDDFunctions, RDD} import org.apache.spark.streaming.StreamingContext import org.apache.spark.streaming.api.java.{JavaDStream, JavaPairDStream, JavaStreamingContext} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.streaming.dstream.{DStream, PairDStreamFunctions} diff --git a/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala b/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala index 8a5c7c0e730e6..204c3c7122845 100644 --- a/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala +++ b/tools/src/main/scala/org/apache/spark/tools/StoragePerfTester.scala @@ -20,8 +20,13 @@ package org.apache.spark.tools import java.util.concurrent.{CountDownLatch, Executors} import java.util.concurrent.atomic.AtomicLong +<<<<<<< HEAD +import org.apache.spark.executor.ShuffleWriteMetrics +import org.apache.spark.{SparkConf, SparkContext} +======= import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.executor.ShuffleWriteMetrics +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.serializer.KryoSerializer import org.apache.spark.shuffle.hash.HashShuffleManager import org.apache.spark.util.Utils diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala index b8daa501af7f0..1211fe1e75117 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/AMDelegationTokenRenewer.scala @@ -24,9 +24,15 @@ import scala.language.postfixOps import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.security.UserGroupInformation +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil + +import org.apache.spark.{Logging, SparkConf} +======= import org.apache.spark.{Logging, SparkConf} import org.apache.spark.deploy.SparkHadoopUtil +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.util.ThreadUtils /* diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala index cccc061647a7f..49bf5df03277d 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMaster.scala @@ -17,22 +17,38 @@ package org.apache.spark.deploy.yarn +<<<<<<< HEAD +import scala.util.control.NonFatal + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import java.io.{File, IOException} import java.lang.reflect.InvocationTargetException import java.net.{Socket, URL} import java.util.concurrent.atomic.AtomicReference +<<<<<<< HEAD +======= import scala.util.control.NonFatal +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.conf.YarnConfiguration +<<<<<<< HEAD +import org.apache.spark.rpc._ +import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkEnv, + SparkException, SparkUserAppException} +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.history.HistoryServer +======= import org.apache.spark._ import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.history.HistoryServer import org.apache.spark.rpc._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, YarnSchedulerBackend} import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages._ import org.apache.spark.util._ @@ -280,10 +296,17 @@ private[spark] class ApplicationMaster( .getOrElse("") val _sparkConf = if (sc != null) sc.getConf else sparkConf +<<<<<<< HEAD + val driverUrl = _rpcEnv.uriOf( + SparkEnv.driverActorSystemName, + RpcAddress(_sparkConf.get("spark.driver.host"), _sparkConf.get("spark.driver.port").toInt), + CoarseGrainedSchedulerBackend.ENDPOINT_NAME) +======= val driverUrl = RpcEndpointAddress( _sparkConf.get("spark.driver.host"), _sparkConf.get("spark.driver.port").toInt, CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 allocator = client.register(driverUrl, driverRef, yarnConf, @@ -309,6 +332,10 @@ private[spark] class ApplicationMaster( port: String, isClusterMode: Boolean): RpcEndpointRef = { val driverEndpoint = rpcEnv.setupEndpointRef( +<<<<<<< HEAD + SparkEnv.driverActorSystemName, +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 RpcAddress(host, port.toInt), YarnSchedulerBackend.ENDPOINT_NAME) amEndpoint = diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala index 5af3941c6023e..936dd9ce602c0 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ApplicationMasterArguments.scala @@ -17,10 +17,16 @@ package org.apache.spark.deploy.yarn +<<<<<<< HEAD +import org.apache.spark.util.{MemoryParam, IntParam} +import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._ +import collection.mutable.ArrayBuffer +======= import scala.collection.mutable.ArrayBuffer import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil._ import org.apache.spark.util.{IntParam, MemoryParam} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 class ApplicationMasterArguments(val args: Array[String]) { var userJar: String = null diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala index 8cf438be587dc..886b2fbde2ee7 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala @@ -28,12 +28,28 @@ import java.util.zip.{ZipEntry, ZipOutputStream} import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, ListBuffer, Map} import scala.reflect.runtime.universe +<<<<<<< HEAD +import scala.util.{Try, Success, Failure} +======= import scala.util.{Failure, Success, Try} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.util.control.NonFatal import com.google.common.base.Charsets.UTF_8 import com.google.common.base.Objects import com.google.common.io.Files +<<<<<<< HEAD + +import org.apache.hadoop.io.DataOutputBuffer +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier +import org.apache.hadoop.fs._ +import org.apache.hadoop.fs.permission.FsPermission +import org.apache.hadoop.io.Text +import org.apache.hadoop.mapreduce.MRJobConfig +import org.apache.hadoop.security.{Credentials, UserGroupInformation} +import org.apache.hadoop.security.token.{TokenIdentifier, Token} +======= import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs._ import org.apache.hadoop.fs.permission.FsPermission @@ -42,6 +58,7 @@ import org.apache.hadoop.io.{DataOutputBuffer, Text} import org.apache.hadoop.mapreduce.MRJobConfig import org.apache.hadoop.security.{Credentials, UserGroupInformation} import org.apache.hadoop.security.token.{Token, TokenIdentifier} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.util.StringUtils import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment @@ -53,8 +70,13 @@ import org.apache.hadoop.yarn.exceptions.ApplicationNotFoundException import org.apache.hadoop.yarn.util.Records import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkContext, SparkException} +<<<<<<< HEAD +import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils} +import org.apache.spark.deploy.SparkHadoopUtil +======= import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.launcher.{LauncherBackend, SparkAppHandle, YarnCommandBuilderUtils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.util.Utils private[spark] class Client( diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala index 4ef05c5a846d5..a0dcb01ad5a0d 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ClientDistributedCacheManager.scala @@ -25,7 +25,11 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileStatus, FileSystem, Path} import org.apache.hadoop.fs.permission.FsAction import org.apache.hadoop.yarn.api.records._ +<<<<<<< HEAD +import org.apache.hadoop.yarn.util.{Records, ConverterUtils} +======= import org.apache.hadoop.yarn.util.{ConverterUtils, Records} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.Logging diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorDelegationTokenUpdater.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorDelegationTokenUpdater.scala index 9d99c0d93fd1e..e3b5e0675f392 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorDelegationTokenUpdater.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorDelegationTokenUpdater.scala @@ -18,16 +18,28 @@ package org.apache.spark.deploy.yarn import java.util.concurrent.{Executors, TimeUnit} +<<<<<<< HEAD +======= import scala.util.control.NonFatal +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.{FileSystem, Path} import org.apache.hadoop.security.{Credentials, UserGroupInformation} +<<<<<<< HEAD +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.{Logging, SparkConf} +import org.apache.spark.util.{ThreadUtils, Utils} + +import scala.util.control.NonFatal + +======= import org.apache.spark.{Logging, SparkConf} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.util.{ThreadUtils, Utils} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 private[spark] class ExecutorDelegationTokenUpdater( sparkConf: SparkConf, hadoopConf: Configuration) extends Logging { diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala index 31fa53e24b507..89c7487d85ab5 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/ExecutorRunnable.scala @@ -25,12 +25,21 @@ import java.util.Collections import scala.collection.JavaConverters._ import scala.collection.mutable.{HashMap, ListBuffer} +<<<<<<< HEAD +import org.apache.hadoop.fs.Path +import org.apache.hadoop.yarn.api.ApplicationConstants.Environment +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.io.DataOutputBuffer +import org.apache.hadoop.security.UserGroupInformation +import org.apache.hadoop.yarn.api._ +======= import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.io.DataOutputBuffer import org.apache.hadoop.security.UserGroupInformation import org.apache.hadoop.yarn.api._ import org.apache.hadoop.yarn.api.ApplicationConstants.Environment +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.client.api.NMClient import org.apache.hadoop.yarn.conf.YarnConfiguration diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala index 11426eb07c7ed..7f6a151957d46 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnAllocator.scala @@ -30,6 +30,10 @@ import org.apache.hadoop.yarn.api.records._ import org.apache.hadoop.yarn.client.api.AMRMClient import org.apache.hadoop.yarn.client.api.AMRMClient.ContainerRequest import org.apache.hadoop.yarn.util.RackResolver +<<<<<<< HEAD + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.log4j.{Level, Logger} import org.apache.spark.{Logging, SecurityManager, SparkConf, SparkException} diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala index af83cf6a77d17..b92a13d5cbeea 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnRMClient.scala @@ -19,8 +19,13 @@ package org.apache.spark.deploy.yarn import java.util.{List => JList} +<<<<<<< HEAD +import scala.collection.JavaConverters._ +import scala.collection.{Map, Set} +======= import scala.collection.{Map, Set} import scala.collection.JavaConverters._ +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import scala.util.Try import org.apache.hadoop.conf.Configuration diff --git a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala index e286aed9f9781..e519e045456ff 100644 --- a/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala +++ b/yarn/src/main/scala/org/apache/spark/deploy/yarn/YarnSparkHadoopUtil.scala @@ -30,6 +30,21 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.Path import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier import org.apache.hadoop.io.Text +<<<<<<< HEAD +import org.apache.hadoop.mapred.{Master, JobConf} +import org.apache.hadoop.security.Credentials +import org.apache.hadoop.security.UserGroupInformation +import org.apache.hadoop.security.token.{Token, TokenIdentifier} +import org.apache.hadoop.yarn.conf.YarnConfiguration +import org.apache.hadoop.yarn.api.ApplicationConstants +import org.apache.hadoop.yarn.api.ApplicationConstants.Environment +import org.apache.hadoop.yarn.api.records.{ApplicationAccessType, ContainerId, Priority} +import org.apache.hadoop.yarn.util.ConverterUtils + +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.launcher.YarnCommandBuilderUtils +import org.apache.spark.{SecurityManager, SparkConf, SparkException} +======= import org.apache.hadoop.mapred.{JobConf, Master} import org.apache.hadoop.security.Credentials import org.apache.hadoop.security.UserGroupInformation @@ -43,6 +58,7 @@ import org.apache.hadoop.yarn.util.ConverterUtils import org.apache.spark.{SecurityManager, SparkConf, SparkException} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.launcher.YarnCommandBuilderUtils +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.util.Utils /** diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala index 20e2030fce086..3fbd9e03bf7df 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnClientSchedulerBackend.scala @@ -21,7 +21,11 @@ import scala.collection.mutable.ArrayBuffer import org.apache.hadoop.yarn.api.records.YarnApplicationState +<<<<<<< HEAD +import org.apache.spark.{SparkException, Logging, SparkContext} +======= import org.apache.spark.{Logging, SparkContext, SparkException} +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.spark.deploy.yarn.{Client, ClientArguments, YarnSparkHadoopUtil} import org.apache.spark.launcher.SparkAppHandle import org.apache.spark.scheduler.TaskSchedulerImpl diff --git a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala index 029382133ddf2..80a49a81cb275 100644 --- a/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala +++ b/yarn/src/main/scala/org/apache/spark/scheduler/cluster/YarnScheduler.scala @@ -18,6 +18,10 @@ package org.apache.spark.scheduler.cluster import org.apache.hadoop.yarn.util.RackResolver +<<<<<<< HEAD + +======= +>>>>>>> 15bd73627e04591fd13667b4838c9098342db965 import org.apache.log4j.{Level, Logger} import org.apache.spark._