diff --git a/.gitignore b/.gitignore
index debad77ec2ad3..a20fd4fc904a9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -62,6 +62,9 @@ unit-tests.log
 ec2/lib/
 rat-results.txt
 scalastyle.txt
+conf/spark-defaults.conf.bak
+conf/*.conf
+conf/conf.cloudera.yarn
 scalastyle-output.xml
 R-unit-tests.log
 R/unit-tests.out
diff --git a/Capfile b/Capfile
new file mode 100644
index 0000000000000..d85d15adaa432
--- /dev/null
+++ b/Capfile
@@ -0,0 +1,69 @@
+require 'bundler/setup'
+require 'capistrano_recipes/deploy/packserv'
+
+set :application, "spark"
+set :user, "deploy"
+set :shared_work_path, "/u/apps/spark/shared/work"
+set :shared_logs_path, "/u/apps/spark/shared/log"
+set :shared_conf_path, "/u/apps/spark/shared/conf"
+set :spark_jar_path, "hdfs://hadoop-production/user/sparkles"
+set :gateway, nil
+set :keep_releases, 5
+set :branch, fetch(:branch, `git symbolic-ref --short HEAD`.gsub("\s",""))
+
+DATANODES = (2..48).map {|i| "dn%02d.chi.shopify.com" % i }
+OTHERNODES = ["hadoop-etl1.chi.shopify.com", "hadoop-misc4.chi.shopify.com", "spark-etl1.chi.shopify.com", "reportify-etl4.chi.shopify.com"]
+BROKEN = [] # Node is down don't try to send code
+
+task :production do
+  role :app, *(DATANODES + OTHERNODES - BROKEN)
+  role :history, "hadoop-rm.chi.shopify.com"
+  role :uploader, "spark-etl1.chi.shopify.com"
+end
+
+namespace :deploy do
+  task :cleanup do
+    count = fetch(:keep_releases, 5).to_i
+    run "ls -1dt /u/apps/spark/releases/* | tail -n +#{count + 1} | xargs rm -rf"
+  end
+
+  task :upload_to_hdfs, :roles => :uploader, :on_no_matching_servers => :continue do
+    run "hdfs dfs -copyFromLocal -f #{release_path}/lib/spark-assembly-*.jar #{fetch(:spark_jar_path)}/spark-assembly-#{fetch(:sha)}.jar"
+    run "hdfs dfs -copyFromLocal -f #{release_path}/python/lib/pyspark.zip #{fetch(:spark_jar_path)}/pyspark-#{fetch(:sha)}.zip"
+    run "hdfs dfs -copyFromLocal -f #{release_path}/python/lib/py4j-*.zip #{fetch(:spark_jar_path)}/py4j-#{fetch(:sha)}.zip"
+  end
+
+  task :prevent_gateway do
+    set :gateway, nil
+  end
+
+  task :symlink_shared do
+    run "ln -nfs #{shared_work_path} #{release_path}/work"
+    run "ln -nfs #{shared_logs_path} #{release_path}/logs"
+    run "rm -rf #{release_path}/conf && ln -nfs #{shared_conf_path} #{release_path}/conf"
+  end
+
+  task :remind_us_to_update_starscream do
+    puts "****************************************************************"
+    puts "*"
+    puts "*    Remember to update starscream/conf/config.yml"
+    puts "*"
+    puts "*    spark_production"
+    puts "*      conf_options:"
+    puts "*      <<: *spark_remote"
+    puts "*      spark.yarn.jar: \"#{fetch(:spark_jar_path)}/spark-assembly-\033[31m#{fetch(:sha)}\033[0m.jar\""
+    puts "*"
+    puts "****************************************************************"
+  end
+
+  task :restart do
+  end
+
+  after 'deploy:initialize_variables', 'deploy:prevent_gateway' # capistrano recipes packserv deploy always uses a gateway
+  before 'deploy:symlink_current', 'deploy:symlink_shared'
+  before 'deploy:test_spark_jar', 'deploy:initialize_variables'
+  before 'deploy:upload_to_hdfs', 'deploy:initialize_variables'
+  after 'deploy:unpack', 'deploy:upload_to_hdfs'
+  after 'deploy:restart', 'deploy:cleanup'
+  after 'deploy:cleanup', 'deploy:remind_us_to_update_starscream'
+end
diff --git a/Gemfile b/Gemfile
new file mode 100644
index 0000000000000..b6d208818e7e4
--- /dev/null
+++ b/Gemfile
@@ -0,0 +1,7 @@
+# A sample Gemfile
+source "https://rubygems.org"
+
+group :deploy do
+  gem 'capistrano', '~> 2'
+  gem 'capistrano-recipes', git: "git@github.com:Shopify/capistrano-recipes", ref: '57bd4ed4accc5561d4774ec2f072bb71bd1b2ea7'
+end
diff --git a/Gemfile.lock b/Gemfile.lock
new file mode 100644
index 0000000000000..1c695014d451d
--- /dev/null
+++ b/Gemfile.lock
@@ -0,0 +1,34 @@
+GIT
+  remote: git@github.com:Shopify/capistrano-recipes
+  revision: 57bd4ed4accc5561d4774ec2f072bb71bd1b2ea7
+  ref: 57bd4ed4accc5561d4774ec2f072bb71bd1b2ea7
+  specs:
+    capistrano-recipes (1.1.0)
+      capistrano (~> 2.15.5)
+      json (>= 1.8.1)
+
+GEM
+  remote: https://rubygems.org/
+  specs:
+    capistrano (2.15.5)
+      highline
+      net-scp (>= 1.0.0)
+      net-sftp (>= 2.0.0)
+      net-ssh (>= 2.0.14)
+      net-ssh-gateway (>= 1.1.0)
+    highline (1.6.21)
+    json (1.8.1)
+    net-scp (1.1.2)
+      net-ssh (>= 2.6.5)
+    net-sftp (2.1.2)
+      net-ssh (>= 2.6.5)
+    net-ssh (2.8.0)
+    net-ssh-gateway (1.2.0)
+      net-ssh (>= 2.6.5)
+
+PLATFORMS
+  ruby
+
+DEPENDENCIES
+  capistrano (~> 2)
+  capistrano-recipes!
diff --git a/README.md b/README.md
index 380422ca00dbe..4d0202f857efc 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,9 @@
-# Apache Spark
+# Shopify's Apache Spark
 
-Spark is a fast and general cluster computing system for Big Data. It provides
-high-level APIs in Scala, Java, and Python, and an optimized engine that
-supports general computation graphs for data analysis. It also supports a
-rich set of higher-level tools including Spark SQL for SQL and DataFrames,
-MLlib for machine learning, GraphX for graph processing,
-and Spark Streaming for stream processing.
-
-<http://spark.apache.org/>
+Spark is a fast and general cluster computing system for Big Data.
 
+This is Shopify's clone with specific to Shopify customizations, mostly
+surrounding configuration.
 
 ## Online Documentation
 
@@ -17,82 +12,14 @@ guide, on the [project web page](http://spark.apache.org/documentation.html)
 and [project wiki](https://cwiki.apache.org/confluence/display/SPARK).
 This README file only contains basic setup instructions.
 
-## Building Spark
-
-Spark is built using [Apache Maven](http://maven.apache.org/).
-To build Spark and its example programs, run:
-
-    build/mvn -DskipTests clean package
-
-(You do not need to do this if you downloaded a pre-built package.)
-More detailed documentation is available from the project site, at
-["Building Spark"](http://spark.apache.org/docs/latest/building-spark.html).
-
-## Interactive Scala Shell
-
-The easiest way to start using Spark is through the Scala shell:
-
-    ./bin/spark-shell
-
-Try the following command, which should return 1000:
-
-    scala> sc.parallelize(1 to 1000).count()
-
-## Interactive Python Shell
-
-Alternatively, if you prefer Python, you can use the Python shell:
-
-    ./bin/pyspark
-
-And run the following command, which should also return 1000:
-
-    >>> sc.parallelize(range(1000)).count()
-
-## Example Programs
-
-Spark also comes with several sample programs in the `examples` directory.
-To run one of them, use `./bin/run-example <class> [params]`. For example:
-
-    ./bin/run-example SparkPi
-
-will run the Pi example locally.
-
-You can set the MASTER environment variable when running examples to submit
-examples to a cluster. This can be a mesos:// or spark:// URL,
-"yarn-cluster" or "yarn-client" to run on YARN, and "local" to run
-locally with one thread, or "local[N]" to run locally with N threads. You
-can also use an abbreviated class name if the class is in the `examples`
-package. For instance:
-
-    MASTER=spark://host:7077 ./bin/run-example SparkPi
-
-Many of the example programs print usage help if no params are given.
-
-## Running Tests
-
-Testing first requires [building Spark](#building-spark). Once Spark is built, tests
-can be run using:
-
-    ./dev/run-tests
-
-Please see the guidance on how to
-[run tests for a module, or individual tests](https://cwiki.apache.org/confluence/display/SPARK/Useful+Developer+Tools).
+## Building Shopify Spark
 
-## A Note About Hadoop Versions
+You can build Shopify spark using `script/setup`, or continuously and incrementally using `script/watch`
 
-Spark uses the Hadoop core library to talk to HDFS and other Hadoop-supported
-storage systems. Because the protocols have changed in different versions of
-Hadoop, you must build Spark against the same version that your cluster runs.
+## Testing Shopify Spark
 
-Please refer to the build documentation at
-["Specifying the Hadoop Version"](http://spark.apache.org/docs/latest/building-spark.html#specifying-the-hadoop-version)
-for detailed guidance on building for a particular distribution of Hadoop, including
-building for particular Hive and Hive Thriftserver distributions. See also
-["Third Party Hadoop Distributions"](http://spark.apache.org/docs/latest/hadoop-third-party-distributions.html)
-for guidance on building a Spark application that works with a particular
-distribution.
+To test a Shopify spark build, assemble the spark jar with `script/setup` or maven, and then unset the `spark.yarn.jar` property from the defaults.conf or the config of the application you are using. Spark will then upload your local assembly to your YARN application's staging, no deploy involved.
 
-## Configuration
+## Deploying Shopify Spark
 
-Please refer to the [Configuration guide](http://spark.apache.org/docs/latest/configuration.html)
-in the online documentation for an overview on how to configure Spark.
+The cap deploy script is only for deploying Shopify Spark to production. To deploy, execute `bundle exec cap production deploy`
diff --git a/SHOPIFY_HADOOP_OPTIONS b/SHOPIFY_HADOOP_OPTIONS
new file mode 100644
index 0000000000000..e51a043249fa9
--- /dev/null
+++ b/SHOPIFY_HADOOP_OPTIONS
@@ -0,0 +1 @@
+-Phadoop-2.4 -Dhadoop.version=2.6.0 -Pyarn -Phive
diff --git a/assembly/pom.xml b/assembly/pom.xml
index e9c6d26ccddc7..2acab016987d9 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -92,27 +92,6 @@
           <skip>true</skip>
         </configuration>
       </plugin>
-        <!-- zip pyspark archives to run python application on yarn mode -->
-        <plugin>
-          <groupId>org.apache.maven.plugins</groupId>
-            <artifactId>maven-antrun-plugin</artifactId>
-            <executions>
-              <execution>
-                <phase>package</phase>
-                  <goals>
-                    <goal>run</goal>
-                  </goals>
-              </execution>
-            </executions>
-            <configuration>
-              <target>
-                <delete dir="${basedir}/../python/lib/pyspark.zip"/>
-                <zip destfile="${basedir}/../python/lib/pyspark.zip">
-                  <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
-                </zip>
-              </target>
-            </configuration>
-        </plugin>
       <!-- Use the shade plugin to create a big JAR with all the dependencies -->
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
@@ -162,6 +141,27 @@
           </execution>
         </executions>
       </plugin>
+        <!-- zip pyspark archives to run python application on yarn mode -->
+        <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+            <artifactId>maven-antrun-plugin</artifactId>
+            <executions>
+              <execution>
+                <phase>package</phase>
+                  <goals>
+                    <goal>run</goal>
+                  </goals>
+              </execution>
+            </executions>
+            <configuration>
+              <target>
+                <delete dir="${basedir}/../python/lib/pyspark.zip"/>
+                <zip destfile="${basedir}/../python/lib/pyspark.zip">
+                  <fileset dir="${basedir}/../python/" includes="pyspark/**/*"/>
+                </zip>
+              </target>
+            </configuration>
+        </plugin>
     </plugins>
   </build>
 
diff --git a/conf/java-opts b/conf/java-opts
new file mode 100644
index 0000000000000..c80852aa70a64
--- /dev/null
+++ b/conf/java-opts
@@ -0,0 +1 @@
+-Djava.security.krb5.realm= -Djava.security.krb5.kdc= -Djava.security.krb5.conf=/dev/null
diff --git a/conf/log4j.properties b/conf/log4j.properties
new file mode 100644
index 0000000000000..be016d0f03577
--- /dev/null
+++ b/conf/log4j.properties
@@ -0,0 +1,22 @@
+# Set everything to be logged to the console
+log4j.rootCategory=INFO, console, file
+
+log4j.appender.console=org.apache.log4j.ConsoleAppender
+log4j.appender.console.Threshold=WARN
+log4j.appender.console.target=System.err
+log4j.appender.console.layout=org.apache.log4j.PatternLayout
+log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n
+
+# setttings for file appender that captures more verbose output
+log4j.appender.file=org.apache.log4j.RollingFileAppender
+log4j.appender.file.File=/tmp/spark.log
+log4j.appender.file.MaxFileSize=20MB
+log4j.appender.file.Threshold=INFO
+log4j.appender.file.MaxBackupIndex=1
+log4j.appender.file.layout=org.apache.log4j.PatternLayout
+log4j.appender.file.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c{1} %m%n
+
+# Settings to quiet third party logs that are too verbose
+log4j.logger.org.eclipse.jetty=WARN
+log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
+log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO
diff --git a/conf/spark-defaults.conf b/conf/spark-defaults.conf
new file mode 100644
index 0000000000000..d4fff194c27f5
--- /dev/null
+++ b/conf/spark-defaults.conf
@@ -0,0 +1,3 @@
+# Shopify doesn't use defaults here and instead lets all the clients specify their own set of defaults.
+# This way, each client can set defaults appropriate to it, as well as change those defaults based on the environment.
+# They also don't have to care about this weird set of overridden values that is different than the defaults listed in the docs.
diff --git a/conf/spark-env.sh b/conf/spark-env.sh
new file mode 100755
index 0000000000000..4b00e86334fa2
--- /dev/null
+++ b/conf/spark-env.sh
@@ -0,0 +1,31 @@
+#!/usr/bin/env bash
+
+echoerr() { echo "$@" 1>&2; }
+FWDIR="$(cd `dirname $0`/..; pwd)"
+
+
+if [ "$(uname)" == "Darwin" ]; then
+  case "$PYTHON_ENV" in
+    'remote_development')
+        echoerr "Sparkify: Connecting to chicago spark cluster ..."
+        # Figure out the local IP to bind spark to for shell <-> master communication
+        vpn_interface=tap0;
+        get_ip_command="ifconfig $vpn_interface 2>&1 | grep 'inet' | awk '{print \$2}'"
+        if ifconfig $vpn_interface > /dev/null 2>&1; then
+          export SPARK_LOCAL_IP=`bash -c "$get_ip_command"`
+        else
+          echoerr "ERROR: could not find an VPN interface to connect to the Shopify Spark Cluster! Please connect your VPN client! See https://vault-unicorn.shopify.com/VPN---Servers ."
+          exit 1
+        fi
+
+        export HADOOP_CONF_DIR=$FWDIR/conf/conf.cloudera.yarn
+        ;;
+    'test'|'development')
+      export SPARK_LOCAL_IP=127.0.0.1
+      ;;
+  esac
+fi
+
+if which ipython > /dev/null; then
+  export IPYTHON=1
+fi
diff --git a/core/pom.xml b/core/pom.xml
index 95f36eb348698..edce5f6332f00 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -380,7 +380,7 @@
     <dependency>
       <groupId>net.razorvine</groupId>
       <artifactId>pyrolite</artifactId>
-      <version>4.4</version>
+      <version>4.9</version>
       <exclusions>
         <exclusion>
           <groupId>net.razorvine</groupId>
diff --git a/core/src/main/java/com/bealetech/metrics/reporting/Statsd.java b/core/src/main/java/com/bealetech/metrics/reporting/Statsd.java
new file mode 100644
index 0000000000000..ab6c9f458d102
--- /dev/null
+++ b/core/src/main/java/com/bealetech/metrics/reporting/Statsd.java
@@ -0,0 +1,201 @@
+package com.bealetech.metrics.reporting;
+
+import org.apache.commons.lang3.StringUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.*;
+import java.net.DatagramPacket;
+import java.net.DatagramSocket;
+import java.net.InetAddress;
+import java.net.SocketException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.regex.Pattern;
+
+/**
+ * A client to a StatsD server.
+ */
+public class Statsd implements Closeable {
+
+    private static final Logger logger = LoggerFactory.getLogger(Statsd.class);
+
+    private static final Pattern WHITESPACE = Pattern.compile("[\\s]+");
+
+    private static final String DRIVER = "driver";
+    private static final String FENCED_DRIVER = "<driver>";
+    private static final String EXECUTOR = "executor";
+
+    private static final String DRIVER_MATCH = ".driver.";
+    private static final String FENCED_DRIVER_MATCH = ".<driver>.";
+    private static final String EXECUTOR_MATCH = ".executor.";
+
+    public static enum StatType { COUNTER, TIMER, GAUGE }
+
+    private final String host;
+    private final int port;
+
+    private String prefix = "spark";
+    private String appPrefix = "spark.app-";
+    private String yarnAppPrefix = "spark.application_";
+
+    private boolean prependNewline = false;
+
+    private ByteArrayOutputStream outputData;
+    private DatagramSocket datagramSocket;
+    private Writer writer;
+
+    public Statsd(String host, int port) {
+        this.host = host;
+        this.port = port;
+
+        outputData = new ByteArrayOutputStream();
+    }
+
+    public void connect() throws IllegalStateException, SocketException {
+        if(datagramSocket != null) {
+            throw new IllegalStateException("Already connected");
+        }
+
+        prependNewline = false;
+
+        datagramSocket = new DatagramSocket();
+
+        outputData.reset();
+        this.writer = new BufferedWriter(new OutputStreamWriter(outputData));
+    }
+
+    public void setNamePrefix(String namePrefix) {
+        prefix = namePrefix;
+        appPrefix = namePrefix + ".app-";
+        yarnAppPrefix = namePrefix + ".application_";
+    }
+
+    private String buildMetricName(String rawName) throws IllegalArgumentException {
+        rawName = WHITESPACE.matcher(rawName).replaceAll("-");
+
+        // Non-yarn worker metrics
+        if (rawName.startsWith(appPrefix)) {
+            String[] parts = rawName.split("\\.");
+            if (parts.length < 5) {
+                throw new IllegalArgumentException("A spark app metric name must contain at least 4 parts: " + rawName);
+            }
+
+            StringBuilder stringBuilder = new StringBuilder(prefix);
+            if (DRIVER.equals(parts[2])) {
+                // e.g. spark.app-20141209201233-0145.driver.BlockManager.memory.maxMem_MB
+                stringBuilder.append(rawName.substring(rawName.indexOf(DRIVER_MATCH)));
+            } else if (EXECUTOR.equals(parts[3])) {
+                // e.g. spark.app-20141209201027-0139.31.executor.filesystem.file.read_bytes
+                stringBuilder.append(rawName.substring(rawName.indexOf(EXECUTOR_MATCH)));
+            } else if ("jvm".equals(parts[3])) {
+                // spark.app-20141212193256-0012.15.jvm.total.max
+                stringBuilder.append(rawName.substring(rawName.indexOf(".jvm.")));
+            } else {
+                throw new IllegalArgumentException("Unrecognized metric name pattern: " + rawName);
+            }
+
+            return stringBuilder.toString();
+        } else if (rawName.startsWith(yarnAppPrefix)) {
+            String[] parts = rawName.split("\\.");
+
+            StringBuilder stringBuilder = new StringBuilder(prefix);
+
+            if (DRIVER.equals(parts[2])) {
+                // e.g. spark.application_1418834509223_0044.driver.jvm.non-heap.used
+                stringBuilder.append(rawName.substring(rawName.indexOf(DRIVER_MATCH)));
+            } else if (FENCED_DRIVER.equals(parts[2])) {
+                stringBuilder.append(rawName.substring(rawName.indexOf(FENCED_DRIVER_MATCH)));
+            } else if (EXECUTOR.equals(parts[3])) {
+                // spark.app-20141212193256-0012.15.executor.filesystem.total.max
+                stringBuilder.append(rawName.substring(rawName.indexOf(EXECUTOR_MATCH)));
+            } else if ("jvm".equals(parts[3])) {
+                // spark.app-20141212193256-0012.15.jvm.total.max
+                stringBuilder.append(rawName.substring(rawName.indexOf(".jvm.")));
+            } else if ("".equals(parts[2])) {
+                stringBuilder.append(rawName.substring(rawName.indexOf("..")));
+            } else {
+                throw new IllegalArgumentException("Unrecognized metric name pattern: " + rawName);
+            }
+
+            return stringBuilder.toString();
+        }
+
+        return rawName;
+    }
+
+    public void send(String name, String value, StatType statType) throws IOException {
+        String statTypeStr = "";
+        switch (statType) {
+            case COUNTER:
+                statTypeStr = "c";
+                break;
+            case GAUGE:
+                statTypeStr = "g";
+                break;
+            case TIMER:
+                statTypeStr = "ms";
+                break;
+        }
+
+        String tags = null; // TODO: Would be nice to get the job name and job user as tags
+
+        try {
+            name = buildMetricName(name);
+        } catch (IllegalArgumentException e) {
+            logger.error("Error sending to Statsd:", e);
+            return; // Drop metrics that we can't process so we don't push metrics with app names (e.g. 20141209201233-0145)
+        }
+
+        try {
+            if (prependNewline) {
+                writer.write("\n");
+            }
+            writer.write(name);
+            writer.write(":");
+            writer.write(value);
+            writer.write("|");
+            writer.write(statTypeStr);
+            if (tags != null) {
+              writer.write("|");
+              writer.write(tags);
+            }
+            prependNewline = true;
+            writer.flush();
+        } catch (IOException e) {
+            logger.error("Error sending to Statsd:", e);
+        }
+    }
+
+    @Override
+    public void close() throws IOException {
+        DatagramPacket packet = newPacket(outputData);
+
+        packet.setData(outputData.toByteArray());
+        datagramSocket.send(packet);
+
+        if(datagramSocket != null) {
+            datagramSocket.close();
+        }
+        this.datagramSocket = null;
+        this.writer = null;
+    }
+
+    private DatagramPacket newPacket(ByteArrayOutputStream out) {
+        byte[] dataBuffer;
+
+        if (out != null) {
+            dataBuffer = out.toByteArray();
+        }
+        else {
+            dataBuffer = new byte[8192];
+        }
+
+        try {
+            return new DatagramPacket(dataBuffer, dataBuffer.length, InetAddress.getByName(this.host), this.port);
+        } catch (Exception e) {
+            return null;
+        }
+    }
+}
diff --git a/core/src/main/java/com/bealetech/metrics/reporting/StatsdReporter.java b/core/src/main/java/com/bealetech/metrics/reporting/StatsdReporter.java
new file mode 100644
index 0000000000000..85bb53784546a
--- /dev/null
+++ b/core/src/main/java/com/bealetech/metrics/reporting/StatsdReporter.java
@@ -0,0 +1,302 @@
+package com.bealetech.metrics.reporting;
+
+import com.codahale.metrics.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A reporter which publishes metric values to a Statds server.
+ *
+ * @see <a href="https://github.com/etsy/statsd">Statsd</a>
+ */
+public class StatsdReporter extends ScheduledReporter {
+
+    /**
+     * Returns a new {@link Builder} for {@link StatsdReporter}.
+     *
+     * @param registry the registry to report
+     * @return a {@link Builder} instance for a {@link StatsdReporter}
+     */
+    public static Builder forRegistry(MetricRegistry registry) {
+        return new Builder(registry);
+    }
+
+    /**
+     * A builder for {@link StatsdReporter} instances. Defaults to not using a prefix, using the
+     * default clock, converting rates to events/second, converting durations to milliseconds, and
+     * not filtering metrics.
+     */
+    public static class Builder {
+        private final MetricRegistry registry;
+        private String prefix;
+        private TimeUnit rateUnit;
+        private TimeUnit durationUnit;
+        private MetricFilter filter;
+
+        private Builder(MetricRegistry registry) {
+            this.registry = registry;
+            this.prefix = null;
+            this.rateUnit = TimeUnit.SECONDS;
+            this.durationUnit = TimeUnit.MILLISECONDS;
+            this.filter = MetricFilter.ALL;
+        }
+
+        /**
+         * Prefix all metric names with the given string.
+         *
+         * @param prefix the prefix for all metric names
+         * @return {@code this}
+         */
+        public Builder prefixedWith(String prefix) {
+            this.prefix = prefix;
+            return this;
+        }
+
+        /**
+         * Convert rates to the given time unit.
+         *
+         * @param rateUnit a unit of time
+         * @return {@code this}
+         */
+        public Builder convertRatesTo(TimeUnit rateUnit) {
+            this.rateUnit = rateUnit;
+            return this;
+        }
+
+        /**
+         * Convert durations to the given time unit.
+         *
+         * @param durationUnit a unit of time
+         * @return {@code this}
+         */
+        public Builder convertDurationsTo(TimeUnit durationUnit) {
+            this.durationUnit = durationUnit;
+            return this;
+        }
+
+        /**
+         * Only report metrics which match the given filter.
+         *
+         * @param filter a {@link MetricFilter}
+         * @return {@code this}
+         */
+        public Builder filter(MetricFilter filter) {
+            this.filter = filter;
+            return this;
+        }
+
+        /**
+         * Builds a {@link StatsdReporter} with the given properties, sending metrics using the
+         * given {@link Statsd} client.
+         *
+         * @param statsd a {@link Statsd} client
+         * @return a {@link StatsdReporter}
+         */
+        public StatsdReporter build(Statsd statsd) {
+            return new StatsdReporter(registry,
+                    statsd,
+                    prefix,
+                    filter,
+                    rateUnit,
+                    durationUnit);
+        }
+    }
+
+    private static final Logger LOGGER = LoggerFactory.getLogger(StatsdReporter.class);
+
+    private final Statsd statsd;
+    private final String prefix;
+
+    public StatsdReporter(MetricRegistry registry,
+                            Statsd statsd,
+                            String prefix,
+                            MetricFilter filter,
+                            TimeUnit rateUnit,
+                            TimeUnit durationUnit) {
+        super(registry, "statsd-reporter", filter, rateUnit, durationUnit);
+
+        this.statsd = statsd;
+        this.statsd.setNamePrefix(prefix);
+        this.prefix = prefix;
+    }
+
+    @Override
+    public void report(SortedMap<String, Gauge> gauges,
+                       SortedMap<String, Counter> counters,
+                       SortedMap<String, Histogram> histograms,
+                       SortedMap<String, Meter> meters,
+                       SortedMap<String, Timer> timers) {
+
+        try {
+            statsd.connect();
+
+            for (Map.Entry<String, Gauge> entry : gauges.entrySet()) {
+                reportGauge(entry.getKey(), entry.getValue());
+            }
+
+            for (Map.Entry<String, Counter> entry : counters.entrySet()) {
+                reportCounter(entry.getKey(), entry.getValue());
+            }
+
+            for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
+                reportHistogram(entry.getKey(), entry.getValue());
+            }
+
+            for (Map.Entry<String, Meter> entry : meters.entrySet()) {
+                reportMetered(entry.getKey(), entry.getValue());
+            }
+
+            for (Map.Entry<String, Timer> entry : timers.entrySet()) {
+                reportTimer(entry.getKey(), entry.getValue());
+            }
+
+        } catch(IOException e) {
+            LOGGER.warn("Unable to report to StatsD", statsd, e);
+        } finally {
+            try {
+                statsd.close();
+            } catch (IOException e) {
+                LOGGER.debug("Error disconnecting from StatsD server", statsd, e);
+            }
+        }
+    }
+
+    private void reportTimer(String name, Timer timer) throws IOException {
+        final Snapshot snapshot = timer.getSnapshot();
+
+        statsd.send(prefix(name, "max"),
+                format(convertDuration(snapshot.getMax())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "mean"),
+                format(convertDuration(snapshot.getMean())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "min"),
+                format(convertDuration(snapshot.getMin())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "stddev"),
+                format(convertDuration(snapshot.getStdDev())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p50"),
+                format(convertDuration(snapshot.getMedian())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p75"),
+                format(convertDuration(snapshot.get75thPercentile())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p95"),
+                format(convertDuration(snapshot.get95thPercentile())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p98"),
+                format(convertDuration(snapshot.get98thPercentile())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p99"),
+                format(convertDuration(snapshot.get99thPercentile())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p999"),
+                format(convertDuration(snapshot.get999thPercentile())),
+                Statsd.StatType.TIMER);
+
+        reportMetered(name, timer);
+    }
+
+    private void reportMetered(String name, Metered meter) throws IOException {
+        statsd.send(prefix(name, "count"), format(meter.getCount()), Statsd.StatType.GAUGE);
+        statsd.send(prefix(name, "m1_rate"),
+                format(convertRate(meter.getOneMinuteRate())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "m5_rate"),
+                format(convertRate(meter.getFiveMinuteRate())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "m15_rate"),
+                format(convertRate(meter.getFifteenMinuteRate())),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "mean_rate"),
+                format(convertRate(meter.getMeanRate())),
+                Statsd.StatType.TIMER);
+    }
+
+    private void reportHistogram(String name, Histogram histogram) throws IOException {
+        final Snapshot snapshot = histogram.getSnapshot();
+        statsd.send(prefix(name, "count"),
+                format(histogram.getCount()),
+                Statsd.StatType.GAUGE);
+        statsd.send(prefix(name, "max"),
+                format(snapshot.getMax()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "mean"),
+                format(snapshot.getMean()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "min"),
+                format(snapshot.getMin()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "stddev"),
+                format(snapshot.getStdDev()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p50"),
+                format(snapshot.getMedian()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p75"),
+                format(snapshot.get75thPercentile()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p95"),
+                format(snapshot.get95thPercentile()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p98"),
+                format(snapshot.get98thPercentile()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p99"),
+                format(snapshot.get99thPercentile()),
+                Statsd.StatType.TIMER);
+        statsd.send(prefix(name, "p999"),
+                format(snapshot.get999thPercentile()),
+                Statsd.StatType.TIMER);
+    }
+
+    private void reportCounter(String name, Counter counter) throws IOException {
+        statsd.send(prefix(name, "count"),
+                format(counter.getCount()),
+                Statsd.StatType.COUNTER);
+    }
+
+    private void reportGauge(String name, Gauge gauge) throws IOException {
+        final String value = format(gauge.getValue());
+        if (value != null) {
+            statsd.send(prefix(name), value,
+                    Statsd.StatType.GAUGE);
+        }
+    }
+
+    private String format(Object o) {
+        if (o instanceof Float) {
+            return format(((Float) o).doubleValue());
+        } else if (o instanceof Double) {
+            return format(((Double) o).doubleValue());
+        } else if (o instanceof Byte) {
+            return format(((Byte) o).longValue());
+        } else if (o instanceof Short) {
+            return format(((Short) o).longValue());
+        } else if (o instanceof Integer) {
+            return format(((Integer) o).longValue());
+        } else if (o instanceof Long) {
+            return format(((Long) o).longValue());
+        }
+        return null;
+    }
+
+    private String prefix(String... components) {
+        return MetricRegistry.name(prefix, components);
+    }
+
+    private String format(long n) {
+        return Long.toString(n);
+    }
+
+    private String format(double v) {
+        return String.format(Locale.US, "%2.2f", v);
+    }
+}
diff --git a/core/src/main/java/com/shopify/metrics/reporting/LogReporter.java b/core/src/main/java/com/shopify/metrics/reporting/LogReporter.java
new file mode 100644
index 0000000000000..22e15e261e9e1
--- /dev/null
+++ b/core/src/main/java/com/shopify/metrics/reporting/LogReporter.java
@@ -0,0 +1,217 @@
+package com.shopify.metrics.reporting;
+
+import com.codahale.metrics.*;
+
+import org.apache.commons.lang3.StringUtils;
+import org.apache.log4j.Logger;
+import org.apache.log4j.Level;
+import org.apache.log4j.RollingFileAppender;
+import org.apache.log4j.PatternLayout;
+
+import java.io.*;
+import java.util.regex.Pattern;
+import java.nio.charset.Charset;
+import java.util.Locale;
+import java.util.Map;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.SortedMap;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * A reporter which creates an appending logfile  of the measurements for each metric
+ */
+
+public class LogReporter extends ScheduledReporter {
+
+  private static final Logger LOGGER = Logger.getLogger(LogReporter.class);
+  private static final Pattern WHITESPACE = Pattern.compile("[\\s]+");
+
+  public static Builder forRegistry(MetricRegistry registry) {
+    return new Builder(registry);
+  }
+
+  public static class Builder {
+    private final MetricRegistry registry;
+    private Locale locale;
+    private TimeUnit rateUnit;
+    private TimeUnit durationUnit;
+    private Clock clock;
+    private MetricFilter filter;
+
+    private Builder(MetricRegistry registry) {
+      this.registry = registry;
+      this.locale = Locale.getDefault();
+      this.rateUnit = TimeUnit.SECONDS;
+      this.durationUnit = TimeUnit.MILLISECONDS;
+      this.clock = Clock.defaultClock();
+      this.filter = MetricFilter.ALL;
+    }
+
+    public Builder formatFor(Locale locale){
+      this.locale = locale;
+      return this;
+    }
+
+    public Builder convertRatesTo(TimeUnit rateUnit){
+      this.rateUnit = rateUnit;
+      return this;
+    }
+
+    public Builder convertDurationsTo(TimeUnit durationUnit){
+      this.durationUnit = durationUnit;
+      return this;
+    }
+
+    public Builder withClock(Clock clock) {
+      this.clock = clock;
+      return this;
+    }
+
+    public Builder filter(MetricFilter filter){
+      this.filter = filter;
+      return this;
+    }
+
+    public LogReporter build(String file, String maxFileSize, int maxBackupIndex) {
+      return new LogReporter(registry, file, maxFileSize, maxBackupIndex, locale, rateUnit, durationUnit, clock, filter);
+    }
+  } 
+
+  private final Locale locale;
+  private final Clock clock;
+  private final Logger logger;
+
+  private LogReporter(MetricRegistry registry,
+      String file,
+      String maxFileSize,
+      int maxBackupIndex,
+      Locale locale,
+      TimeUnit rateUnit,
+      TimeUnit durationUnit,
+      Clock clock,
+      MetricFilter filter) {
+
+    super(registry, "log-reporter", filter, rateUnit, durationUnit);
+    this.logger = Logger.getLogger("com.shopify.metrics");
+    this.logger.setAdditivity(false);
+
+    try {
+      PatternLayout layout = new PatternLayout("%d{ISO8601} %c %m%n");
+      RollingFileAppender logfile = new RollingFileAppender(layout, file);
+
+      LOGGER.info(String.format("Creating metrics output file: %s", file));
+      logfile.setMaxFileSize(maxFileSize);
+      logfile.setMaxBackupIndex(maxBackupIndex);
+
+      this.logger.setLevel(Level.INFO);
+      this.logger.addAppender(logfile);
+    } catch (IOException e) {
+      LOGGER.error("Could not add appender", e);
+    }
+
+    this.locale = locale;
+    this.clock = clock;
+  }
+
+  @Override
+  public void report(SortedMap<String, Gauge> gauges,
+      SortedMap<String, Counter> counters,
+      SortedMap<String, Histogram> histograms,
+      SortedMap<String, Meter> meters,
+      SortedMap<String, Timer> timers) {
+
+    final long timestamp = TimeUnit.MILLISECONDS.toSeconds(clock.getTime());
+
+    for (Map.Entry<String, Gauge> entry : gauges.entrySet()) {
+      reportGauge(timestamp, entry.getKey(), entry.getValue());
+    }
+
+    for (Map.Entry<String, Counter> entry : counters.entrySet()) {
+      reportCounter(timestamp, entry.getKey(), entry.getValue());
+    }
+
+    for (Map.Entry<String, Histogram> entry : histograms.entrySet()) {
+      reportHistogram(timestamp, entry.getKey(), entry.getValue());
+    }
+
+    for (Map.Entry<String, Meter> entry : meters.entrySet()) {
+      reportMeter(timestamp, entry.getKey(), entry.getValue());
+    }
+
+    for (Map.Entry<String, Timer> entry : timers.entrySet()) {
+      reportTimer(timestamp, entry.getKey(), entry.getValue());
+    }
+  }
+
+  private void reportTimer(long timestamp, String name, Timer timer ) {
+
+    final Snapshot snapshot = timer.getSnapshot();
+
+    report(timestamp,
+        name,
+        "count=%d max=%f mean=%f min=%f stddev= %f p50=%f p75=%f p95=%f p98=%f p99=%f p999=%f mean_rate=%f m1_rate=%f m5_rate=%f m15_rate=%f rate_unit=calls/%s duration_unit=%s",
+        timer.getCount(),
+        convertDuration(snapshot.getMax()),
+        convertDuration(snapshot.getMean()),
+        convertDuration(snapshot.getMin()),
+        convertDuration(snapshot.getStdDev()),
+        convertDuration(snapshot.getMedian()),
+        convertDuration(snapshot.get75thPercentile()),
+        convertDuration(snapshot.get95thPercentile()),
+        convertDuration(snapshot.get98thPercentile()),
+        convertDuration(snapshot.get99thPercentile()),
+        convertDuration(snapshot.get999thPercentile()),
+        convertRate(timer.getMeanRate()),
+        convertRate(timer.getOneMinuteRate()),
+        convertRate(timer.getFiveMinuteRate()),
+        convertRate(timer.getFifteenMinuteRate()),
+        getRateUnit(),
+        getDurationUnit());
+  }
+
+  private void reportMeter(long timestamp, String name, Meter meter) {
+    report(timestamp,
+        name,
+        "count=%d mean_rate=%f m1_rate=%f m5_rate=%f m15_rate=%f rate_unit=events/%s",
+        meter.getCount(),
+        convertRate(meter.getMeanRate()),
+        convertRate(meter.getOneMinuteRate()),
+        convertRate(meter.getFiveMinuteRate()),
+        convertRate(meter.getFifteenMinuteRate()),
+        getRateUnit());
+  } 
+
+  private void reportHistogram(long timestamp, String name, Histogram histogram) {
+    final Snapshot snapshot = histogram.getSnapshot();
+
+    report(timestamp,
+        name,
+        "count=%d max=%d mean=%f min=%f stddev=%f p50=%f p75=%f p95=%f p98=%f p99=%f p999=%f",
+        histogram.getCount(),
+        snapshot.getMax(),
+        snapshot.getMean(),
+        snapshot.getMin(),
+        snapshot.getStdDev(),
+        snapshot.getMedian(),
+        snapshot.get75thPercentile(),
+        snapshot.get95thPercentile(),
+        snapshot.get98thPercentile(),
+        snapshot.get99thPercentile(),
+        snapshot.get999thPercentile());
+  }
+
+  private void reportGauge(long timestamp, String name, Gauge gauge){
+    report(timestamp, name, "value=%s", gauge.getValue());
+  }
+
+  private void reportCounter(long timestamp, String name, Counter counter) {
+    report(timestamp, name,"count=%d", counter.getCount());
+  }
+
+  private void report(long timestamp, String name, String line, Object... values) {
+    String metrics = String.format(line, values);
+    this.logger.info(String.format(locale, "event_at=%d %s %s", timestamp, name, metrics));
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/LogSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/LogSink.scala
new file mode 100644
index 0000000000000..ba4618ed9e53e
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/LogSink.scala
@@ -0,0 +1,89 @@
+/*
+* Licensed to the Apache Software Foundation (ASF) under one or more
+* contributor license agreements.  See the NOTICE file distributed with
+* this work for additional information regarding copyright ownership.
+* The ASF licenses this file to You under the Apache License, Version 2.0
+* (the "License"); you may not use this file except in compliance with
+* the License.  You may obtain a copy of the License at
+*
+*    http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*/
+
+package org.apache.spark.metrics.sink
+
+import java.io.File
+import java.util.{Locale, Properties}
+import java.util.concurrent.TimeUnit
+
+import com.codahale.metrics.MetricRegistry
+import com.shopify.metrics.reporting.LogReporter
+
+import org.apache.spark.SecurityManager
+import org.apache.spark.metrics.MetricsSystem
+
+private[spark] class LogSink(val property: Properties, val registry: MetricRegistry,
+  securityMgr: SecurityManager) extends Sink {
+  
+    val LOG_KEY_PERIOD = "period"
+    val LOG_KEY_UNIT = "unit"
+    val LOG_KEY_FILE  = "file"
+    val LOG_KEY_MAX_FILE_SIZE = "maxFileSize"
+    val LOG_KEY_MAX_BACKUP_INDEX = "maxFileIndex"
+    
+    val LOG_DEFAULT_PERIOD = 10
+    val LOG_DEFAULT_UNIT = "SECONDS"
+    val LOG_DEFAULT_FILE = "/tmp/metrics"
+    val LOG_DEFAULT_MAX_FILE_SIZE = "50mb"
+    val LOG_DEFAULT_BACKUP_INDEX = 10
+
+    val pollPeriod = Option(property.getProperty(LOG_KEY_PERIOD)) match {
+      case Some(s) => s.toInt
+      case None => LOG_DEFAULT_PERIOD
+    }
+
+    val pollUnit: TimeUnit = Option(property.getProperty(LOG_KEY_UNIT)) match {
+      case Some(s) => TimeUnit.valueOf(s.toUpperCase())
+      case None => TimeUnit.valueOf(LOG_DEFAULT_UNIT)
+    }
+
+    MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)
+
+    val pollFile = Option(property.getProperty(LOG_KEY_FILE)) match {
+      case Some(s) => s
+      case None => LOG_DEFAULT_FILE
+    }
+    
+    val maxFileSize = Option(property.getProperty(LOG_KEY_MAX_FILE_SIZE)) match {
+      case Some(s) => s.toString
+      case None => LOG_DEFAULT_MAX_FILE_SIZE
+    }
+    
+    val maxBackupIndex = Option(property.getProperty(LOG_KEY_MAX_BACKUP_INDEX)) match {
+      case Some(s) => s.toInt 
+      case None => LOG_DEFAULT_BACKUP_INDEX
+    }
+    
+    val reporter: LogReporter = LogReporter.forRegistry(registry)
+        .formatFor(Locale.US)
+        .convertDurationsTo(TimeUnit.MILLISECONDS)
+        .convertRatesTo(TimeUnit.SECONDS)
+        .build(pollFile, maxFileSize, maxBackupIndex)
+
+    override def start() {
+      reporter.start(pollPeriod, pollUnit)
+    }
+
+    override def stop() {
+      reporter.stop()
+    }
+    
+    override def report() {
+      reporter.report()
+    }
+  }
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala b/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala
new file mode 100644
index 0000000000000..402d4968e51ec
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/StatsdSink.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.metrics.sink
+
+import java.net.InetSocketAddress
+import java.util.Properties
+import java.util.concurrent.TimeUnit
+
+import com.codahale.metrics.MetricRegistry
+import com.bealetech.metrics.reporting.{Statsd, StatsdReporter}
+
+import org.apache.spark.SecurityManager
+import org.apache.spark.metrics.MetricsSystem
+
+private[spark] class StatsdSink(val property: Properties, val registry: MetricRegistry,
+    securityMgr: SecurityManager) extends Sink {
+  val STATSD_DEFAULT_PERIOD = 10
+  val STATSD_DEFAULT_UNIT = "SECONDS"
+  val STATSD_DEFAULT_PREFIX = ""
+
+  val STATSD_KEY_HOST = "host"
+  val STATSD_KEY_PORT = "port"
+  val STATSD_KEY_PERIOD = "period"
+  val STATSD_KEY_UNIT = "unit"
+  val STATSD_KEY_PREFIX = "prefix"
+
+  def propertyToOption(prop: String) = Option(property.getProperty(prop))
+
+  if (!propertyToOption(STATSD_KEY_HOST).isDefined) {
+    throw new Exception("Statsd sink requires 'host' property.")
+  }
+
+  if (!propertyToOption(STATSD_KEY_PORT).isDefined) {
+    throw new Exception("Statsd sink requires 'port' property.")
+  }
+
+  val host = propertyToOption(STATSD_KEY_HOST).get
+  val port = propertyToOption(STATSD_KEY_PORT).get.toInt
+
+  val pollPeriod = propertyToOption(STATSD_KEY_PERIOD) match {
+    case Some(s) => s.toInt
+    case None => STATSD_DEFAULT_PERIOD
+  }
+
+  val pollUnit = propertyToOption(STATSD_KEY_UNIT) match {
+    case Some(s) => TimeUnit.valueOf(s.toUpperCase())
+    case None => TimeUnit.valueOf(STATSD_DEFAULT_UNIT)
+  }
+
+  val prefix = propertyToOption(STATSD_KEY_PREFIX).getOrElse(STATSD_DEFAULT_PREFIX)
+
+  MetricsSystem.checkMinimalPollingPeriod(pollUnit, pollPeriod)
+
+  val statsd: Statsd = new Statsd(host, port)
+
+  val reporter: StatsdReporter = StatsdReporter.forRegistry(registry)
+      .convertDurationsTo(TimeUnit.MILLISECONDS)
+      .convertRatesTo(TimeUnit.SECONDS)
+      .prefixedWith(prefix)
+      .build(statsd)
+
+  override def start() {
+    reporter.start(pollPeriod, pollUnit)
+  }
+
+  override def stop() {
+    reporter.stop()
+  }
+
+  override def report() {
+    try {
+      reporter.report()
+    } catch {
+      case e: NullPointerException => println("StatsD reporter errored upon exit");
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
index 660702f6e6fd0..3e1d1e27f728e 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala
@@ -420,6 +420,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp
     unknownExecutors.foreach { id =>
       logWarning(s"Executor to kill $id does not exist!")
     }
+    executorsPendingToRemove --= knownExecutors
 
     // If we do not wish to replace the executors we kill, sync the target number of executors
     // with the cluster manager to avoid allocating new ones. When computing the new target,
diff --git a/python/pyspark/daemon.py b/python/pyspark/daemon.py
index 7f06d4288c872..9426ae32150ac 100644
--- a/python/pyspark/daemon.py
+++ b/python/pyspark/daemon.py
@@ -14,6 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import re
+import resource
 
 import numbers
 import os
@@ -26,7 +28,7 @@
 import gc
 from errno import EINTR, EAGAIN
 from socket import AF_INET, SOCK_STREAM, SOMAXCONN
-from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT
+from signal import SIGHUP, SIGTERM, SIGCHLD, SIG_DFL, SIG_IGN, SIGINT, SIGPROF
 
 from pyspark.worker import main as worker_main
 from pyspark.serializers import read_int, write_int
@@ -51,12 +53,39 @@ def worker(sock):
     # it's useful for debugging (show the stacktrace before exit)
     signal.signal(SIGINT, signal.default_int_handler)
 
+    # Shopify added profiling signal handler
+    profiling = [False]
+
+    def handle_sigprof(*args):
+        import yappi
+
+        if not profiling[0]:
+            profiling[0] = True
+            yappi.start()
+        else:
+            profiling[0] = False
+            yappi.get_func_stats().print_all()
+            yappi.get_thread_stats().print_all()
+    signal.signal(SIGPROF, handle_sigprof)
+
+    # Blocks until the socket is closed by draining the input stream
+    # until it raises an exception or returns EOF.
+    def waitSocketClose(sock):
+        try:
+            while True:
+                # Empty string is returned upon EOF (and only then).
+                if sock.recv(4096) == '':
+                    return
+        except:
+            pass
+
     # Read the socket using fdopen instead of socket.makefile() because the latter
     # seems to be very slow; note that we need to dup() the file descriptor because
     # otherwise writes also cause a seek that makes us miss data on the read side.
     infile = os.fdopen(os.dup(sock.fileno()), "rb", 65536)
     outfile = os.fdopen(os.dup(sock.fileno()), "wb", 65536)
     exit_code = 0
+
     try:
         worker_main(infile, outfile)
     except SystemExit as exc:
diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py
index fa8e0a0574a62..e48a0e625fb0b 100644
--- a/python/pyspark/rdd.py
+++ b/python/pyspark/rdd.py
@@ -53,6 +53,7 @@
 from pyspark.traceback_utils import SCCallSiteSync
 
 from py4j.java_collections import ListConverter, MapConverter
+from statsd import DogStatsd as statsd
 
 
 __all__ = ["RDD"]
@@ -1694,10 +1695,13 @@ def partitionBy(self, numPartitions, partitionFunc=portable_hash):
 
         def add_shuffle_key(split, iterator):
 
+            client = statsd()
             buckets = defaultdict(list)
+            record_count = 0
             c, batch = 0, min(10 * numPartitions, 1000)
 
             for k, v in iterator:
+                record_count += 1
                 buckets[partitionFunc(k) % numPartitions].append((k, v))
                 c += 1
 
diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
index 411b4dbf481f1..3664e3c31a126 100644
--- a/python/pyspark/serializers.py
+++ b/python/pyspark/serializers.py
@@ -161,7 +161,15 @@ def _read_with_length(self, stream):
         obj = stream.read(length)
         if len(obj) < length:
             raise EOFError
-        return self.loads(obj)
+
+        try:
+            result = self.loads(obj)
+        except TypeError as e:
+            print >>sys.stderr, "Error while decoding"
+            print >>sys.stderr, obj.encode('hex')
+            raise
+
+        return result
 
     def dumps(self, obj):
         """
diff --git a/python/pyspark/statsd.py b/python/pyspark/statsd.py
new file mode 100644
index 0000000000000..d0c8f92ac984a
--- /dev/null
+++ b/python/pyspark/statsd.py
@@ -0,0 +1,145 @@
+"""
+DogStatsd is a Python client for DogStatsd, a Statsd fork for Datadog.
+"""
+
+import logging
+from random import random
+from time import time
+import socket
+
+try:
+    from itertools import imap
+except ImportError:
+    imap = map
+
+
+log = logging.getLogger('dogstatsd')
+
+
+class DogStatsd(object):
+
+    def __init__(self, host='localhost', port=8125):
+        """
+        Initialize a DogStatsd object.
+
+        >>> statsd = DogStatsd()
+
+        :param host: the host of the DogStatsd server.
+        :param port: the port of the DogStatsd server.
+        """
+        self._host = None
+        self._port = None
+        self.socket = None
+        self.connect(host, port)
+
+    def connect(self, host, port):
+        """
+        Connect to the statsd server on the given host and port.
+        """
+        self._host = host
+        self._port = int(port)
+        self.socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+        self.socket.connect((self._host, self._port))
+
+    def gauge(self, metric, value, tags=None, sample_rate=1):
+        """
+        Record the value of a gauge, optionally setting a list of tags and a
+        sample rate.
+
+        >>> statsd.gauge('users.online', 123)
+        >>> statsd.gauge('active.connections', 1001, tags=["protocol:http"])
+        """
+        return self._send(metric, 'g', value, tags, sample_rate)
+
+    def increment(self, metric, value=1, tags=None, sample_rate=1):
+        """
+        Increment a counter, optionally setting a value, tags and a sample
+        rate. Tags is an optional list of key value pairs separated by
+        colons.
+
+        >>> statsd.increment('page.views')
+        >>> statsd.increment('files.transferred', 124)
+        """
+        self._send(metric, 'c', value, tags, sample_rate)
+
+    def decrement(self, metric, value=1, tags=None, sample_rate=1):
+        """
+        Decrement a counter, optionally setting a value, tags and a sample
+        rate.
+
+        >>> statsd.decrement('files.remaining')
+        >>> statsd.decrement('active.connections', 2)
+        """
+        self._send(metric, 'c', -value, tags, sample_rate)
+
+    def histogram(self, metric, value, tags=None, sample_rate=1):
+        """
+        Sample a histogram value, optionally setting tags and a sample rate.
+
+        >>> statsd.histogram('uploaded.file.size', 1445)
+        >>> statsd.histogram('album.photo.count', 26, tags=["gender:female"])
+        """
+        self._send(metric, 'h', value, tags, sample_rate)
+
+    def timing(self, metric, value, tags=None, sample_rate=1):
+        """
+        Record a timing, optionally setting tags and a sample rate.
+
+        >>> statsd.timing("query.response.time", 1234)
+        """
+        self._send(metric, 'ms', value, tags, sample_rate)
+
+    def timed(self, metric, tags=None, sample_rate=1):
+        """
+        A decorator that will mesaure the distribution of a function's run time.
+        Optionally specify a list of tag or a sample rate.
+        ::
+
+            @statsd.timed('user.query.time', sample_rate=0.5)
+            def get_user(user_id):
+                # Do what you need to ...
+                pass
+
+            # Is equivalent to ...
+            start = time.time()
+            try:
+                get_user(user_id)
+            finally:
+                statsd.timing('user.query.time', time.time() - start)
+        """
+        def wrapper(func):
+            def wrapped(*args, **kwargs):
+                start = time()
+                result = func(*args, **kwargs)
+                self.timing(metric, time() - start, tags=tags, sample_rate=sample_rate)
+                return result
+            wrapped.__name__ = func.__name__
+            wrapped.__doc__  = func.__doc__
+            wrapped.__dict__.update(func.__dict__)
+            return wrapped
+        return wrapper
+
+    def set(self, metric, value, tags=None, sample_rate=1):
+        """
+        Sample a set value.
+
+        >>> statsd.set('visitors.uniques', 999)
+        """
+        self._send(metric, 's', value, tags, sample_rate)
+
+    def _send(self, metric, metric_type, value, tags, sample_rate):
+        if sample_rate != 1 and random() > sample_rate:
+            return
+
+        payload = [metric, ":", value, "|", metric_type]
+        if sample_rate != 1:
+            payload.extend(["|@", sample_rate])
+        if tags:
+            if not type(tags) is list:
+                tags = [tags]
+            payload.extend(["|#", ",".join([str(tag) for tag in tags])])
+
+        try:
+            self.socket.send("".join(imap(str, payload)))
+        except socket.error:
+            log.exception("Error submitting metric %s" % metric)
diff --git a/script/compile b/script/compile
new file mode 100755
index 0000000000000..429217d4970d8
--- /dev/null
+++ b/script/compile
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -x
+set -e
+
+FWDIR="$(cd `dirname $0`/..; pwd)"
+GIT_SHA=$(git rev-parse HEAD)
+GIT_BRANCH=$(git name-rev HEAD)
+GIT_DESC=$(git describe HEAD)
+GIT_HUMAN=$(git log --pretty=format:"%s (%an)" HEAD...HEAD~1)
+
+# Compile spark jars into dist folder
+export LOCAL_SBT_DIR=1
+export SBT_HOME=$FWDIR/sbt
+export SCALA_HOME=$HOME/.sbt/boot/scala-2.10.4
+
+# Find JAVA_HOME on OS X and Linux
+uname=$(uname)
+if [[ "$uname" == "Darwin" ]]; then
+  JAVA_HOME=$(/usr/libexec/java_home -v 1.7)
+else
+  JAVA_HOME=/usr/lib/jvm/default-java/
+fi
+export JAVA_HOME=$JAVA_HOME
+
+./make-distribution.sh --skip-java-test $(cat SHOPIFY_HADOOP_OPTIONS)
+
+if [ "$?" != "0" ] || [ -e "$FWDIR/lib/spark-assembly*hadoop*.jar" ]; then
+  echo "Failed to make spark distro using sbt."
+  exit 1
+fi
+
+# Remove everything not in dist or conf
+find * -maxdepth 0 -name 'dist' -o -name 'conf' -prune -o -exec rm -rf '{}' ';'
+
+# Copy everything out of dist that doesn't exist already
+mv -n dist/* .
+echo $GIT_SHA > ./GIT_SHA
+echo $GIT_BRANCH >> ./GIT_DESC
+echo $GIT_DESC >> ./GIT_DESC
+echo $GIT_HUMAN >> ./GIT_DESC
diff --git a/script/get_config b/script/get_config
new file mode 100755
index 0000000000000..32ba5e9f9d901
--- /dev/null
+++ b/script/get_config
@@ -0,0 +1,15 @@
+#!/usr/bin/env bash
+
+set -x
+set -e
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+CONFIG_PATH=$DIR/../conf/conf.cloudera.yarn
+
+rm -rf $CONFIG_PATH
+mkdir $CONFIG_PATH
+curl -o /tmp/cloudera_configs.zip http://util-ng.chi.shopify.com:7180/cmf/services/47/client-config
+unzip -j /tmp/cloudera_configs.zip -d $CONFIG_PATH
+
+# The topology script isn't available locally so we just don't run it
+perl -0777 -i -pe 's/\s*<property>\n\s*<name>net.topology.script.file.name<\/name>\n\s*<value>[^<]+?<\/value>\n\s*<\/property>//igs' $CONFIG_PATH/core-site.xml
diff --git a/script/setup b/script/setup
new file mode 100755
index 0000000000000..edee707a893a6
--- /dev/null
+++ b/script/setup
@@ -0,0 +1,13 @@
+#!/usr/bin/env bash
+
+set -x
+set -e
+
+if [ "$CI" = "" ]; then
+  ./script/get_config
+fi
+
+FWDIR="$(cd `dirname $0`/..; pwd)"
+export HADOOP_OPTIONS="$(cat $FWDIR/SHOPIFY_HADOOP_OPTIONS)"
+export JAVA_HOME=$(/usr/libexec/java_home -v 1.8)
+build/mvn $HADOOP_OPTIONS -DskipTests clean package
diff --git a/script/watch b/script/watch
new file mode 100755
index 0000000000000..b747765b9f635
--- /dev/null
+++ b/script/watch
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+
+set -x
+set -e
+
+FWDIR="$(cd `dirname $0`/..; pwd)"
+export HADOOP_OPTIONS="$(cat $FWDIR/SHOPIFY_HADOOP_OPTIONS)"
+export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
+mvn $HADOOP_OPTIONS scala:cc