Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
3494b12
[SPARK-25566][SPARK-25567][WEBUI][SQL] Support pagination for SQL tab…
shahidki31 Oct 12, 2018
d47a25f
[SPARK-25670][TEST] Reduce number of tested timezones in JsonExpressi…
MaxGekk Oct 12, 2018
541d7e1
[SPARK-25685][BUILD] Allow running tests in Jenkins in enterprise Git…
LantaoJin Oct 12, 2018
52f9f66
[SPARK-25712][CORE][MINOR] Improve usage message of start-master.sh a…
gengliangwang Oct 12, 2018
8e039a7
[SPARK-25697][CORE] When zstd compression enabled, InProgress applica…
shahidki31 Oct 12, 2018
c7eadb5
[SPARK-25660][SQL] Fix for the backward slash as CSV fields delimiter
MaxGekk Oct 12, 2018
4e141a4
[STREAMING][DOC] Fix typo & formatting for JavaDoc
mastloui-msft Oct 12, 2018
e965fb5
[SPARK-25664][SQL][TEST] Refactor JoinBenchmark to use main method
wangyum Oct 12, 2018
1ddfab8
[SPARK-19287][CORE][STREAMING] JavaPairRDD flatMapValues requires fun…
srowen Oct 12, 2018
3946de7
[SPARK-20327][CORE][YARN] Add CLI support for YARN custom resources, …
Oct 13, 2018
c9ba59d
[SPARK-25714] Fix Null Handling in the Optimizer rule BooleanSimplifi…
gatorsmile Oct 13, 2018
34f229b
[SPARK-25710][SQL] range should report metrics correctly
cloud-fan Oct 13, 2018
8812746
[MINOR] Fix code comment in BooleanSimplification.
gatorsmile Oct 13, 2018
2eaf058
[SPARK-25718][SQL] Detect recursive reference in Avro schema and thro…
gengliangwang Oct 13, 2018
26c1b95
[SPARK-25711][CORE] Improve start-history-server.sh: show usage User-…
gengliangwang Oct 13, 2018
b73f76b
[SPARK-25714][SQL][FOLLOWUP] improve the comment inside BooleanSimpli…
cloud-fan Oct 13, 2018
6bbceb9
[SPARK-25726][SQL][TEST] Fix flaky test in SaveIntoDataSourceCommandS…
dongjoon-hyun Oct 14, 2018
6c3f2c6
[SPARK-25727][SQL] Add outputOrdering to otherCopyArgs in InMemoryRel…
gatorsmile Oct 14, 2018
9426fd0
[SPARK-25372][YARN][K8S][FOLLOW-UP] Deprecate and generalize keytab /…
gatorsmile Oct 14, 2018
56247c1
[SPARK-25727][FOLLOWUP] Move outputOrdering to case class field for I…
mgaido91 Oct 15, 2018
0820484
[SPARK-25716][SQL][MINOR] remove unnecessary collection operation in …
SongYadong Oct 15, 2018
6c9c84f
[SPARK-23257][K8S] Kerberos Support for Spark on K8S
ifilonenko Oct 15, 2018
4cee191
[SPARK-25674][FOLLOW-UP] Update the stats for each ColumnarBatch
gatorsmile Oct 16, 2018
fdaa998
[SPARK-25738][SQL] Fix LOAD DATA INPATH for hdfs port
squito Oct 16, 2018
5c7f6b6
[SPARK-25629][TEST] Reduce ParquetFilterSuite: filter pushdown test t…
wangyum Oct 16, 2018
e028fd3
[SPARK-25736][SQL][TEST] add tests to verify the behavior of multi-co…
cloud-fan Oct 16, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions core/src/main/scala/org/apache/spark/SparkConf.scala
Original file line number Diff line number Diff line change
Expand Up @@ -729,9 +729,9 @@ private[spark] object SparkConf extends Logging {
EXECUTOR_MEMORY_OVERHEAD.key -> Seq(
AlternateConfig("spark.yarn.executor.memoryOverhead", "2.3")),
KEYTAB.key -> Seq(
AlternateConfig("spark.yarn.keytab", "2.5")),
AlternateConfig("spark.yarn.keytab", "3.0")),
PRINCIPAL.key -> Seq(
AlternateConfig("spark.yarn.principal", "2.5"))
AlternateConfig("spark.yarn.principal", "3.0"))
)

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ package org.apache.spark.api.java

import java.{lang => jl}
import java.lang.{Iterable => JIterable}
import java.util.{Comparator, List => JList}
import java.util.{Comparator, Iterator => JIterator, List => JList}

import scala.collection.JavaConverters._
import scala.language.implicitConversions
Expand All @@ -34,7 +34,8 @@ import org.apache.spark.{HashPartitioner, Partitioner}
import org.apache.spark.Partitioner._
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
import org.apache.spark.api.java.JavaUtils.mapAsSerializableJavaMap
import org.apache.spark.api.java.function.{Function => JFunction, Function2 => JFunction2, PairFunction}
import org.apache.spark.api.java.function.{FlatMapFunction, Function => JFunction,
Function2 => JFunction2, PairFunction}
import org.apache.spark.partial.{BoundedDouble, PartialResult}
import org.apache.spark.rdd.{OrderedRDDFunctions, RDD}
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
Expand Down Expand Up @@ -674,8 +675,8 @@ class JavaPairRDD[K, V](val rdd: RDD[(K, V)])
* Pass each value in the key-value pair RDD through a flatMap function without changing the
* keys; this also retains the original RDD's partitioning.
*/
def flatMapValues[U](f: JFunction[V, java.lang.Iterable[U]]): JavaPairRDD[K, U] = {
def fn: (V) => Iterable[U] = (x: V) => f.call(x).asScala
def flatMapValues[U](f: FlatMapFunction[V, U]): JavaPairRDD[K, U] = {
def fn: (V) => Iterator[U] = (x: V) => f.call(x).asScala
implicit val ctag: ClassTag[U] = fakeClassTag
fromRDD(rdd.flatMapValues(fn))
}
Expand Down
9 changes: 5 additions & 4 deletions core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,7 @@ private[spark] class SparkSubmit extends Logging {
val targetDir = Utils.createTempDir()

// assure a keytab is available from any place in a JVM
if (clusterManager == YARN || clusterManager == LOCAL || isMesosClient) {
if (clusterManager == YARN || clusterManager == LOCAL || isMesosClient || isKubernetesCluster) {
if (args.principal != null) {
if (args.keytab != null) {
require(new File(args.keytab).exists(), s"Keytab file: ${args.keytab} does not exist")
Expand Down Expand Up @@ -646,7 +646,8 @@ private[spark] class SparkSubmit extends Logging {
}
}

if (clusterManager == MESOS && UserGroupInformation.isSecurityEnabled) {
if ((clusterManager == MESOS || clusterManager == KUBERNETES)
&& UserGroupInformation.isSecurityEnabled) {
setRMPrincipal(sparkConf)
}

Expand Down Expand Up @@ -762,8 +763,8 @@ private[spark] class SparkSubmit extends Logging {
}

// [SPARK-20328]. HadoopRDD calls into a Hadoop library that fetches delegation tokens with
// renewer set to the YARN ResourceManager. Since YARN isn't configured in Mesos mode, we
// must trick it into thinking we're YARN.
// renewer set to the YARN ResourceManager. Since YARN isn't configured in Mesos or Kubernetes
// mode, we must trick it into thinking we're YARN.
private def setRMPrincipal(sparkConf: SparkConf): Unit = {
val shortUserName = UserGroupInformation.getCurrentUser.getShortUserName
val key = s"spark.hadoop.${YarnConfiguration.RM_PRINCIPAL}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,35 +34,21 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin

@tailrec
private def parse(args: List[String]): Unit = {
if (args.length == 1) {
setLogDirectory(args.head)
} else {
args match {
case ("--dir" | "-d") :: value :: tail =>
setLogDirectory(value)
parse(tail)
args match {
case ("--help" | "-h") :: tail =>
printUsageAndExit(0)

case ("--help" | "-h") :: tail =>
printUsageAndExit(0)
case ("--properties-file") :: value :: tail =>
propertiesFile = value
parse(tail)

case ("--properties-file") :: value :: tail =>
propertiesFile = value
parse(tail)
case Nil =>

case Nil =>

case _ =>
printUsageAndExit(1)
}
case _ =>
printUsageAndExit(1)
}
}

private def setLogDirectory(value: String): Unit = {
logWarning("Setting log directory through the command line is deprecated as of " +
"Spark 1.1.0. Please set this through spark.history.fs.logDirectory instead.")
conf.set("spark.history.fs.logDirectory", value)
}

// This mutates the SparkConf, so all accesses to it must be made after this line
Utils.loadDefaultSparkProperties(conf, propertiesFile)

Expand All @@ -73,8 +59,6 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin
|Usage: HistoryServer [options]
|
|Options:
| DIR Deprecated; set spark.history.fs.logDirectory directly
| --dir DIR (-d DIR) Deprecated; set spark.history.fs.logDirectory directly
| --properties-file FILE Path to a custom Spark properties file.
| Default is conf/spark-defaults.conf.
|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ private[spark] class ReplayListenerBus extends SparkListenerBus with Logging {
case e: HaltReplayException =>
// Just stop replay.
case _: EOFException if maybeTruncated =>
case _: IOException if maybeTruncated =>
logWarning(s"Failed to read Spark event log: $sourceName")
case ioe: IOException =>
throw ioe
case e: Exception =>
Expand Down
4 changes: 2 additions & 2 deletions core/src/main/scala/org/apache/spark/ui/PagedTable.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import org.apache.spark.util.Utils
*
* @param pageSize the number of rows in a page
*/
private[ui] abstract class PagedDataSource[T](val pageSize: Int) {
private[spark] abstract class PagedDataSource[T](val pageSize: Int) {

if (pageSize <= 0) {
throw new IllegalArgumentException("Page size must be positive")
Expand Down Expand Up @@ -72,7 +72,7 @@ private[ui] case class PageData[T](totalPage: Int, data: Seq[T])
/**
* A paged table that will generate a HTML table for a specified page and also the page navigation.
*/
private[ui] trait PagedTable[T] {
private[spark] trait PagedTable[T] {

def tableId: String

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,11 +200,12 @@ private[spark] object Benchmark {
def getProcessorName(): String = {
val cpu = if (SystemUtils.IS_OS_MAC_OSX) {
Utils.executeAndGetOutput(Seq("/usr/sbin/sysctl", "-n", "machdep.cpu.brand_string"))
.stripLineEnd
} else if (SystemUtils.IS_OS_LINUX) {
Try {
val grepPath = Utils.executeAndGetOutput(Seq("which", "grep")).stripLineEnd
Utils.executeAndGetOutput(Seq(grepPath, "-m", "1", "model name", "/proc/cpuinfo"))
.stripLineEnd.replaceFirst("model name[\\s*]:[\\s*]", "")
.stripLineEnd.replaceFirst("model name[\\s*]:[\\s*]", "")
}.getOrElse("Unknown processor")
} else {
System.getenv("PROCESSOR_IDENTIFIER")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,18 +40,6 @@ class HistoryServerArgumentsSuite extends SparkFunSuite {
assert(conf.get("spark.testing") === "true")
}

test("Directory Arguments Parsing --dir or -d") {
val argStrings = Array("--dir", "src/test/resources/spark-events1")
val hsa = new HistoryServerArguments(conf, argStrings)
assert(conf.get("spark.history.fs.logDirectory") === "src/test/resources/spark-events1")
}

test("Directory Param can also be set directly") {
val argStrings = Array("src/test/resources/spark-events2")
val hsa = new HistoryServerArguments(conf, argStrings)
assert(conf.get("spark.history.fs.logDirectory") === "src/test/resources/spark-events2")
}

test("Properties File Arguments Parsing --properties-file") {
val tmpDir = Utils.createTempDir()
val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
Expand Down
6 changes: 4 additions & 2 deletions dev/run-tests-jenkins.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ def print_err(msg):
def post_message_to_github(msg, ghprb_pull_id):
print("Attempting to post to Github...")

url = "https://api.github.com/repos/apache/spark/issues/" + ghprb_pull_id + "/comments"
api_url = os.getenv("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
url = api_url + "/issues/" + ghprb_pull_id + "/comments"
github_oauth_key = os.environ["GITHUB_OAUTH_KEY"]

posted_message = json.dumps({"body": msg})
Expand Down Expand Up @@ -176,7 +177,8 @@ def main():
build_display_name = os.environ["BUILD_DISPLAY_NAME"]
build_url = os.environ["BUILD_URL"]

commit_url = "https://github.com/apache/spark/commit/" + ghprb_actual_commit
project_url = os.getenv("SPARK_PROJECT_URL", "https://github.com/apache/spark")
commit_url = project_url + "/commit/" + ghprb_actual_commit

# GitHub doesn't auto-link short hashes when submitted via the API, unfortunately. :(
short_commit_hash = ghprb_actual_commit[0:7]
Expand Down
28 changes: 28 additions & 0 deletions docs/building-spark.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,31 @@ For SBT, specify a complete scala version using (e.g. 2.12.6):
./build/sbt -Dscala.version=2.12.6

Otherwise, the sbt-pom-reader plugin will use the `scala.version` specified in the spark-parent pom.

## Running Jenkins tests with Github Enterprise

To run tests with Jenkins:

./dev/run-tests-jenkins

If use an individual repository or a repository on GitHub Enterprise, export below environment variables before running above command.

### Related environment variables

<table class="table">
<tr><th>Variable Name</th><th>Default</th><th>Meaning</th></tr>
<tr>
<td><code>SPARK_PROJECT_URL</code></td>
<td>https://github.com/apache/spark</td>
<td>
The Spark project URL of GitHub Enterprise.
</td>
</tr>
<tr>
<td><code>GITHUB_API_BASE</code></td>
<td>https://api.github.com/repos/apache/spark</td>
<td>
The Spark project API server URL of GitHub Enterprise.
</td>
</tr>
</table>
41 changes: 41 additions & 0 deletions docs/running-on-kubernetes.md
Original file line number Diff line number Diff line change
Expand Up @@ -821,4 +821,45 @@ specific to Spark on Kubernetes.
This sets the major Python version of the docker image used to run the driver and executor containers. Can either be 2 or 3.
</td>
</tr>
<tr>
<td><code>spark.kubernetes.kerberos.krb5.path</code></td>
<td><code>(none)</code></td>
<td>
Specify the local location of the krb5.conf file to be mounted on the driver and executors for Kerberos interaction.
It is important to note that the KDC defined needs to be visible from inside the containers.
</td>
</tr>
<tr>
<td><code>spark.kubernetes.kerberos.krb5.configMapName</code></td>
<td><code>(none)</code></td>
<td>
Specify the name of the ConfigMap, containing the krb5.conf file, to be mounted on the driver and executors
for Kerberos interaction. The KDC defined needs to be visible from inside the containers. The ConfigMap must also
be in the same namespace of the driver and executor pods.
</td>
</tr>
<tr>
<td><code>spark.kubernetes.hadoop.configMapName</code></td>
<td><code>(none)</code></td>
<td>
Specify the name of the ConfigMap, containing the HADOOP_CONF_DIR files, to be mounted on the driver
and executors for custom Hadoop configuration.
</td>
</tr>
<tr>
<td><code>spark.kubernetes.kerberos.tokenSecret.name</code></td>
<td><code>(none)</code></td>
<td>
Specify the name of the secret where your existing delegation tokens are stored. This removes the need for the job user
to provide any kerberos credentials for launching a job.
</td>
</tr>
<tr>
<td><code>spark.kubernetes.kerberos.tokenSecret.itemKey</code></td>
<td><code>(none)</code></td>
<td>
Specify the item key of the data where your existing delegation tokens are stored. This removes the need for the job user
to provide any kerberos credentials for launching a job.
</td>
</tr>
</table>
37 changes: 37 additions & 0 deletions docs/running-on-yarn.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,43 @@ To use a custom metrics.properties for the application master and executors, upd
Use lower-case suffixes, e.g. <code>k</code>, <code>m</code>, <code>g</code>, <code>t</code>, and <code>p</code>, for kibi-, mebi-, gibi-, tebi-, and pebibytes, respectively.
</td>
</tr>
<tr>
<td><code>spark.yarn.am.resource.{resource-type}</code></td>
<td><code>(none)</code></td>
<td>
Amount of resource to use for the YARN Application Master in client mode.
In cluster mode, use <code>spark.yarn.driver.resource.&lt;resource-type&gt;</code> instead.
Please note that this feature can be used only with YARN 3.0+
For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
<p/>
Example:
To request GPU resources from YARN, use: <code>spark.yarn.am.resource.yarn.io/gpu</code>
</td>
</tr>
<tr>
<td><code>spark.yarn.driver.resource.{resource-type}</code></td>
<td><code>(none)</code></td>
<td>
Amount of resource to use for the YARN Application Master in cluster mode.
Please note that this feature can be used only with YARN 3.0+
For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
<p/>
Example:
To request GPU resources from YARN, use: <code>spark.yarn.driver.resource.yarn.io/gpu</code>
</td>
</tr>
<tr>
<td><code>spark.yarn.executor.resource.{resource-type}</code></td>
<td><code>(none)</code></td>
<td>
Amount of resource to use per executor process.
Please note that this feature can be used only with YARN 3.0+
For reference, see YARN Resource Model documentation: https://hadoop.apache.org/docs/r3.0.1/hadoop-yarn/hadoop-yarn-site/ResourceModel.html
<p/>
Example:
To request GPU resources from YARN, use: <code>spark.yarn.executor.resource.yarn.io/gpu</code>
</td>
</tr>
<tr>
<td><code>spark.yarn.am.cores</code></td>
<td><code>1</code></td>
Expand Down
Loading