Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
cbfcc68
Changed the default sbt/sbt to build/sbt and added a build/mvn which …
Dec 8, 2014
4a1609c
finalizing working linux install for maven to local ./build/apache-ma…
Dec 9, 2014
69c4e44
working linux and osx installers for purely local mvn build
Dec 9, 2014
f914dea
Beginning final portions of localized scala home
Dec 11, 2014
07bf018
Updated to specifically handle pulling down the correct scala version
Dec 15, 2014
ef017e6
removed OS complexities, setup generic install_app call, removed extr…
Dec 15, 2014
bb8cc9d
removed package files
Dec 15, 2014
a680d12
Added comments to functions and tested various mvn calls
Dec 15, 2014
3e8b9b3
updated to properly only restart zinc if it was freshly installed
Dec 15, 2014
1af4a94
fixed bug with uppercase vs lowercase variable
Dec 15, 2014
14a5da0
removed unnecessary zinc output on startup
Dec 15, 2014
7e785a6
added silent and quiet flags to curl and wget respectively, added sin…
Dec 16, 2014
28d0a99
updated to remove the python dependency, uses grep instead
Dec 22, 2014
be11317
added switch to silence download progress only if AMPLAB_JENKINS is set
Dec 22, 2014
b8437ba
set progress bars for curl and wget when not run on jenkins, no progr…
Dec 23, 2014
c5634de
updated documentation to overview build/mvn, updated all points where…
Dec 23, 2014
b979c58
updated the merge conflict
Dec 23, 2014
d2d41b6
added blurb about leverging zinc with build/mvn
Dec 23, 2014
9b79e38
fixed merge conflicts with dev/run-tests, properly quoted args in sbt…
Dec 27, 2014
0e5a0e4
minor incorrect doc verbage (with -> this)
Dec 27, 2014
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,19 @@
*.pyc
.idea/
.idea_modules/
sbt/*.jar
build/*.jar
.settings
.cache
cache
.generated-mima*
/build/
work/
out/
.DS_Store
third_party/libmesos.so
third_party/libmesos.dylib
build/apache-maven*
build/zinc*
build/scala*
conf/java-opts
conf/*.sh
conf/*.cmd
Expand Down
132 changes: 132 additions & 0 deletions build/mvn
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
#!/usr/bin/env bash

# Determine the current working directory
_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# Preserve the calling directory
_CALLING_DIR="$(pwd)"

# Installs any application tarball given a URL, the expected tarball name,
# and, optionally, a checkable binary path to determine if the binary has
# already been installed
## Arg1 - URL
## Arg2 - Tarball Name
## Arg3 - Checkable Binary
install_app() {
local remote_tarball="$1/$2"
local local_tarball="${_DIR}/$2"
local binary="${_DIR}/$3"

# setup `curl` and `wget` silent options if we're running on Jenkins
local curl_opts=""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you might need to pass --progress-bar to curl and --progress=bar to wget. At least this is what our old code did.

local wget_opts=""
if [ -n "$AMPLAB_JENKINS" ]; then
curl_opts="-s"
wget_opts="--quiet"
else
curl_opts="--progress-bar"
wget_opts="--progress=bar:force"
fi

if [ -z "$3" -o ! -f "$binary" ]; then
# check if we already have the tarball
# check if we have curl installed
# download application
[ ! -f "${local_tarball}" ] && [ -n "`which curl 2>/dev/null`" ] && \
echo "exec: curl ${curl_opts} ${remote_tarball}" && \
curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
# if the file still doesn't exist, lets try `wget` and cross our fingers
[ ! -f "${local_tarball}" ] && [ -n "`which wget 2>/dev/null`" ] && \
echo "exec: wget ${wget_opts} ${remote_tarball}" && \
wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
# if both were unsuccessful, exit
[ ! -f "${local_tarball}" ] && \
echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
echo "please install manually and try again." && \
exit 2
cd "${_DIR}" && tar -xzf "$2"
rm -rf "$local_tarball"
fi
}

# Install maven under the build/ folder
install_mvn() {
install_app \
"http://apache.claz.org/maven/maven-3/3.2.3/binaries" \
"apache-maven-3.2.3-bin.tar.gz" \
"apache-maven-3.2.3/bin/mvn"
MVN_BIN="${_DIR}/apache-maven-3.2.3/bin/mvn"
}

# Install zinc under the build/ folder
install_zinc() {
local zinc_path="zinc-0.3.5.3/bin/zinc"
[ ! -f "${zinc_path}" ] && ZINC_INSTALL_FLAG=1
install_app \
"http://downloads.typesafe.com/zinc/0.3.5.3" \
"zinc-0.3.5.3.tgz" \
"${zinc_path}"
ZINC_BIN="${_DIR}/${zinc_path}"
}

# Determine the Scala version from the root pom.xml file, set the Scala URL,
# and, with that, download the specific version of Scala necessary under
# the build/ folder
install_scala() {
# determine the Scala version used in Spark
local scala_version=`grep "scala.version" "${_DIR}/../pom.xml" | \
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be overkill to use a command-line utility like xmlstarlet to select this field? I'm worried that seemingly-innocuous changes to the POM might break the Maven build.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would probably be less brittle, but I guess it would introduce another dependency which we'd have to install in this script (since we want it to be a one-click installer). Since xmlstarlet binaries aren't portable, the installation logic might be complex since we'd need to have some platform-specific logic to download the right binaries.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually I had originally used python to grab this out in a more concrete way, but @pwendell recommended we remove as many dependencies as possible so I switched over to the grep | cut scenario. Let me think it over and I might be able to find bash-only way to assuredly grab the correct version. That said, we could always add an entry to project/build.properties (e.g. scala.version=2.10.4) and confidently pull it from there without all this hassle. What do you guys think?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, I'd lean towards just leaving it as-is. Doesn't seem to be worth the hassle when compared against the requirement to reduce complexity and dependencies.

head -1 | cut -f2 -d'>' | cut -f1 -d'<'`
local scala_bin="${_DIR}/scala-${scala_version}/bin/scala"

install_app \
"http://downloads.typesafe.com/scala/${scala_version}" \
"scala-${scala_version}.tgz" \
"scala-${scala_version}/bin/scala"

SCALA_COMPILER="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-compiler.jar"
SCALA_LIBRARY="$(cd "$(dirname ${scala_bin})/../lib" && pwd)/scala-library.jar"
}

# Determines if a given application is already installed. If not, will attempt
# to install
## Arg1 - application name
## Arg2 - Alternate path to local install under build/ dir
check_and_install_app() {
# create the local environment variable in uppercase
local app_bin="`echo $1 | awk '{print toupper(\$0)}'`_BIN"
# some black magic to set the generated app variable (i.e. MVN_BIN) into the
# environment
eval "${app_bin}=`which $1 2>/dev/null`"

if [ -z "`which $1 2>/dev/null`" ]; then
install_$1
fi
}

# Setup healthy defaults for the Zinc port if none were provided from
# the environment
ZINC_PORT=${ZINC_PORT:-"3030"}

# Check and install all applications necessary to build Spark
check_and_install_app "mvn"

# Install the proper version of Scala and Zinc for the build
install_zinc
install_scala

# Reset the current working directory
cd "${_CALLING_DIR}"

# Now that zinc is ensured to be installed, check its status and, if its
# not running or just installed, start it
if [ -n "${ZINC_INSTALL_FLAG}" -o -z "`${ZINC_BIN} -status`" ]; then
${ZINC_BIN} -shutdown
${ZINC_BIN} -start -port ${ZINC_PORT} \
-scala-compiler "${SCALA_COMPILER}" \
-scala-library "${SCALA_LIBRARY}" &>/dev/null
fi

# Set any `mvn` options if not already present
export MAVEN_OPTS=${MAVEN_OPTS:-"-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"}

# Last, call the `mvn` command as usual
${MVN_BIN} "$@"
111 changes: 111 additions & 0 deletions build/sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/usr/bin/env bash

# When creating new tests for Spark SQL Hive, the HADOOP_CLASSPATH must contain the hive jars so
# that we can run Hive to generate the golden answer. This is not required for normal development
# or testing.
for i in "$HIVE_HOME"/lib/*
do HADOOP_CLASSPATH="$HADOOP_CLASSPATH:$i"
done
export HADOOP_CLASSPATH

realpath () {
(
TARGET_FILE="$1"

cd "$(dirname "$TARGET_FILE")"
TARGET_FILE="$(basename "$TARGET_FILE")"

COUNT=0
while [ -L "$TARGET_FILE" -a $COUNT -lt 100 ]
do
TARGET_FILE="$(readlink "$TARGET_FILE")"
cd $(dirname "$TARGET_FILE")
TARGET_FILE="$(basename $TARGET_FILE)"
COUNT=$(($COUNT + 1))
done

echo "$(pwd -P)/"$TARGET_FILE""
)
}

. "$(dirname "$(realpath "$0")")"/sbt-launch-lib.bash


declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy"
declare -r sbt_opts_file=".sbtopts"
declare -r etc_sbt_opts_file="/etc/sbt/sbtopts"

usage() {
cat <<EOM
Usage: $script_name [options]

-h | -help print this message
-v | -verbose this runner is chattier
-d | -debug set sbt log level to debug
-no-colors disable ANSI color codes
-sbt-create start sbt even if current directory contains no sbt project
-sbt-dir <path> path to global settings/plugins directory (default: ~/.sbt)
-sbt-boot <path> path to shared boot directory (default: ~/.sbt/boot in 0.11 series)
-ivy <path> path to local Ivy repository (default: ~/.ivy2)
-mem <integer> set memory options (default: $sbt_mem, which is $(get_mem_opts $sbt_mem))
-no-share use all local caches; no sharing
-no-global uses global caches, but does not use global ~/.sbt directory.
-jvm-debug <port> Turn on JVM debugging, open at the given port.
-batch Disable interactive mode

# sbt version (default: from project/build.properties if present, else latest release)
-sbt-version <version> use the specified version of sbt
-sbt-jar <path> use the specified jar as the sbt launcher
-sbt-rc use an RC version of sbt
-sbt-snapshot use a snapshot version of sbt

# java version (default: java from PATH, currently $(java -version 2>&1 | grep version))
-java-home <path> alternate JAVA_HOME

# jvm options and output control
JAVA_OPTS environment variable, if unset uses "$java_opts"
SBT_OPTS environment variable, if unset uses "$default_sbt_opts"
.sbtopts if this file exists in the current directory, it is
prepended to the runner args
/etc/sbt/sbtopts if this file exists, it is prepended to the runner args
-Dkey=val pass -Dkey=val directly to the java runtime
-J-X pass option -X directly to the java runtime
(-J is stripped)
-S-X add -X to sbt's scalacOptions (-S is stripped)
-PmavenProfiles Enable a maven profile for the build.

In the case of duplicated or conflicting options, the order above
shows precedence: JAVA_OPTS lowest, command line options highest.
EOM
}

process_my_args () {
while [[ $# -gt 0 ]]; do
case "$1" in
-no-colors) addJava "-Dsbt.log.noformat=true" && shift ;;
-no-share) addJava "$noshare_opts" && shift ;;
-no-global) addJava "-Dsbt.global.base=$(pwd)/project/.sbtboot" && shift ;;
-sbt-boot) require_arg path "$1" "$2" && addJava "-Dsbt.boot.directory=$2" && shift 2 ;;
-sbt-dir) require_arg path "$1" "$2" && addJava "-Dsbt.global.base=$2" && shift 2 ;;
-debug-inc) addJava "-Dxsbt.inc.debug=true" && shift ;;
-batch) exec </dev/null && shift ;;

-sbt-create) sbt_create=true && shift ;;

*) addResidual "$1" && shift ;;
esac
done

# Now, ensure sbt version is used.
[[ "${sbt_version}XXX" != "XXX" ]] && addJava "-Dsbt.version=$sbt_version"
}

loadConfigFile() {
cat "$1" | sed '/^\#/d'
}

# if sbtopts files exist, prepend their contents to $@ so it can be processed by this runner
[[ -f "$etc_sbt_opts_file" ]] && set -- $(loadConfigFile "$etc_sbt_opts_file") "$@"
[[ -f "$sbt_opts_file" ]] && set -- $(loadConfigFile "$sbt_opts_file") "$@"

run "$@"
6 changes: 3 additions & 3 deletions sbt/sbt-launch-lib.bash → build/sbt-launch-lib.bash
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@ dlog () {
}

acquire_sbt_jar () {
SBT_VERSION=`awk -F "=" '/sbt\\.version/ {print $2}' ./project/build.properties`
SBT_VERSION=`awk -F "=" '/sbt\.version/ {print $2}' ./project/build.properties`
URL1=http://typesafe.artifactoryonline.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
URL2=http://repo.typesafe.com/typesafe/ivy-releases/org.scala-sbt/sbt-launch/${SBT_VERSION}/sbt-launch.jar
JAR=sbt/sbt-launch-${SBT_VERSION}.jar
JAR=build/sbt-launch-${SBT_VERSION}.jar

sbt_jar=$JAR

Expand Down Expand Up @@ -150,7 +150,7 @@ process_args () {
-java-home) require_arg path "$1" "$2" && java_cmd="$2/bin/java" && export JAVA_HOME=$2 && shift 2 ;;

-D*) addJava "$1" && shift ;;
-J*) addJava "${1:2}" && shift ;;
-J*) addJava "${1:2}" && shift ;;
-P*) enableProfile "$1" && shift ;;
*) addResidual "$1" && shift ;;
esac
Expand Down
10 changes: 5 additions & 5 deletions dev/create-release/create-release.sh
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ if [[ ! "$@" =~ --package-only ]]; then
git commit -a -m "Preparing development version $next_ver"
git push origin $GIT_TAG
git push origin HEAD:$GIT_BRANCH
git checkout -f $GIT_TAG
git checkout -f $GIT_TAG

# Using Nexus API documented here:
# https://support.sonatype.com/entries/39720203-Uploading-to-a-Staging-Repository-via-REST-API
echo "Creating Nexus staging repository"
Expand All @@ -106,7 +106,7 @@ if [[ ! "$@" =~ --package-only ]]; then
clean install

./dev/change-version-to-2.11.sh

mvn -DskipTests -Dhadoop.version=2.2.0 -Dyarn.version=2.2.0 \
-Dscala-2.11 -Pyarn -Phive -Phadoop-2.2 -Pspark-ganglia-lgpl -Pkinesis-asl \
clean install
Expand Down Expand Up @@ -174,7 +174,7 @@ make_binary_release() {
NAME=$1
FLAGS=$2
cp -r spark spark-$RELEASE_VERSION-bin-$NAME

cd spark-$RELEASE_VERSION-bin-$NAME

# TODO There should probably be a flag to make-distribution to allow 2.11 support
Expand Down Expand Up @@ -219,7 +219,7 @@ scp spark-* \

# Docs
cd spark
sbt/sbt clean
build/sbt clean
cd docs
# Compile docs with Java 7 to use nicer format
JAVA_HOME=$JAVA_7_HOME PRODUCTION=1 jekyll build
Expand Down
8 changes: 4 additions & 4 deletions dev/mima
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,21 @@ set -e
FWDIR="$(cd "`dirname "$0"`"/..; pwd)"
cd "$FWDIR"

echo -e "q\n" | sbt/sbt oldDeps/update
echo -e "q\n" | build/sbt oldDeps/update
rm -f .generated-mima*

# Generate Mima Ignore is called twice, first with latest built jars
# Generate Mima Ignore is called twice, first with latest built jars
# on the classpath and then again with previous version jars on the classpath.
# Because of a bug in GenerateMIMAIgnore that when old jars are ahead on classpath
# it did not process the new classes (which are in assembly jar).
# it did not process the new classes (which are in assembly jar).
./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore

export SPARK_CLASSPATH="`find lib_managed \( -name '*spark*jar' -a -type f \) | tr "\\n" ":"`"
echo "SPARK_CLASSPATH=$SPARK_CLASSPATH"

./bin/spark-class org.apache.spark.tools.GenerateMIMAIgnore

echo -e "q\n" | sbt/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
echo -e "q\n" | build/sbt mima-report-binary-issues | grep -v -e "info.*Resolving"
ret_val=$?

if [ $ret_val != 0 ]; then
Expand Down
Loading