diff --git a/.gitignore b/.gitignore index faada9c8a..73df90509 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,8 @@ bld/ # Visual Studio 2015/2017 cache/options directory .vs/ +# Visual Studio Code cache/options directory +.vscode/ # Uncomment if you have tasks that create the project's static files in wwwroot #wwwroot/ diff --git a/binder/Dockerfile b/binder/Dockerfile new file mode 100644 index 000000000..cf9401047 --- /dev/null +++ b/binder/Dockerfile @@ -0,0 +1 @@ +FROM mcr.microsoft.com/dotnet-spark:2.4.6-0.12.1-interactive diff --git a/binder/README.md b/binder/README.md new file mode 100644 index 000000000..926671da2 --- /dev/null +++ b/binder/README.md @@ -0,0 +1,7 @@ +# .NET for Apache Spark Interactive + +This interactive notebook contains allows you to explore .NET for Apache Spark in your web-browser. + +To launch it, just click the button below: + +[![Binder](./dotnet-spark-binder.svg)](https://mybinder.org/v2/gh/indy-3rdman/spark/docker_images_init?urlpath=lab/tree/nb/) diff --git a/binder/dotnet-spark-binder.svg b/binder/dotnet-spark-binder.svg new file mode 100644 index 000000000..121abdcf8 --- /dev/null +++ b/binder/dotnet-spark-binder.svg @@ -0,0 +1 @@ +launchlaunchSpakr.NET interactiveSpark.NET interactive diff --git a/docker/images/interactive/README.md b/docker/images/interactive/README.md new file mode 100644 index 000000000..f5b96f03c --- /dev/null +++ b/docker/images/interactive/README.md @@ -0,0 +1,69 @@ +# .NET for Apache Spark interactive Docker image + +## Description + +This directory contains the source code to build a docker interactive image by using [jupyter/base-notebook](https://hub.docker.com/r/jupyter/base-notebook) as foundation. + +## Building + +To build the image, just execute the [build.sh](build.sh) bash script. Per default it should build an image using the latest supported versions of .NET Core, Apache Spark and .NET for Apache Spark. + +You can also build for different versions, by specifying one of the following options: + +```bash + -a, --apache-spark + -d, --dotnet-spark +``` + +For more details please run + +```bash +build.sh -h +``` + +Please note, that not all version combinations are supported, however. + +## The image build stages + +Using different stages makes sense to efficiently build multiple images that are based on the same .NET core SDK etc, but are using different .NET for Apache Spark or Apache Spark versions. +In that way, dependencies (e.g. .NET Core SDK) do not have to be downloaded again and again, while building an image for a different version. This saves time and bandwidth. + +The three stages used in the build process are: + +- ### **dotnet-interactive** + + Builds on the jupyter/base-notebook image and installs the .NET Core SDK, along with Microsoft.DotNet.Interactive. + +- ### **dotnet-spark-base (interactive)** + + Adds the specified .NET for Apache Spark version to the dotnet-interactive image and also copies/builds the HelloSpark example into the image. HelloSpark is also use to install the correct microsoft-spark-*.jar version that is required to start a spark-submit session in debug mode. + +- ### **dotnet-spark (interactive)** + + Gets/installs the specified Apache Spark version and adds the example notebooks. + +## Docker Run Example + +To start a new container based on the dotnet-spark interactive image, just run the following command. + +```bash +docker run --name dotnet-spark-interactive -d -p 8888:8888 3rdman/dotnet-spark:interactive-latest +``` + +After that, examine the logs of the container to get the correct URL that is required to connect to Juypter using the authentication token. + +```bash +docker logs -f dotnet-spark-interactive +``` + +![launch](img/dotnet-interactive-docker-launch.gif) + +It is important to start the .NET for Apache Spark backend in debug mode first, before using it in any of the notebooks. + +The helper script start-spark-debug.sh can do this for you, as demonstrated below. + +![debug](img/dotnet-interactive-start-debug.gif) + +Once the backend is running, please open 02-basic-example.ipynb to learn how you can use .NET for Apache Spark in your own notebooks. + +![example](img/dotnet-interactive-basic-example.gif) \ No newline at end of file diff --git a/docker/images/interactive/build.sh b/docker/images/interactive/build.sh new file mode 100755 index 000000000..5525f96c0 --- /dev/null +++ b/docker/images/interactive/build.sh @@ -0,0 +1,251 @@ +#!/usr/bin/env bash + +# Create different versions of the .NET for Apache Spark interactive docker image +# based on the Apach Spark and .NET for Apache Spark version. + +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes + +readonly image_repository='3rdman' +readonly supported_apache_spark_versions=( + "2.3.0" "2.3.1" "2.3.2" "2.3.3" "2.3.4" + "2.4.0" "2.4.1" "2.4.3" "2.4.4" "2.4.5" "2.4.6" "2.4.7" + "3.0.0" "3.0.1" + ) +readonly supported_dotnet_spark_versions=("1.0.0") +readonly dotnet_core_version=3.1 + +dotnet_spark_version=1.0.0 +dotnet_spark_jar="" +apache_spark_version=3.0.1 +apache_spark_short_version="${apache_spark_version:0:3}" + +main() { + # Parse the options an set the related variables + while [[ "$#" -gt 0 ]]; do + case $1 in + -a|--apache-spark) opt_check_apache_spark_version "$2"; shift ;; + -d|--dotnet-spark) opt_check_dotnet_spark_version "$2"; shift ;; + -h|--help) print_help + exit 1 ;; + *) echo "Unknown parameter passed: $1"; exit 1 ;; + esac + shift + done + + echo "Building .NET for Apache Spark ${dotnet_spark_version} runtime image with Apache Spark ${apache_spark_version}" + + # execute the different build stages + cleanup + + set_dotnet_spark_jar + build_dotnet_interactive + build_dotnet_spark_base_interactive + build_dotnet_spark_interactive + + trap finish EXIT ERR + + exit 0 +} + +####################################### +# Checks if the provided Apache Spark version number is supported +# Arguments: +# The version number string +# Result: +# Sets the global variable apache_spark_version if supported, +# otherwise exits with a related message +####################################### +opt_check_apache_spark_version() { + local provided_version="${1}" + local valid_version="" + + for value in "${supported_apache_spark_versions[@]}" + do + [[ "${provided_version}" = "$value" ]] && valid_version="${provided_version}" + done + + if [ -z "${valid_version}" ] + then + echo "${provided_version} is an unsupported Apache Spark version." + exit 1 ; + else + apache_spark_version="${valid_version}" + apache_spark_short_version="${apache_spark_version:0:3}" + fi +} + +####################################### +# Checks if the provided .NET for Apache Spark version number is supported +# Arguments: +# The version number string +# Result: +# Sets the global variable dotnet_spark_version if supported, +# otherwise exits with a related message +####################################### +opt_check_dotnet_spark_version() { + local provided_version="${1}" + local valid_version="" + + for value in "${supported_dotnet_spark_versions[@]}" + do + [[ "${provided_version}" = "$value" ]] && valid_version="${provided_version}" + done + + if [ -z "${valid_version}" ] + then + echo "${provided_version} is an unsupported .NET for Apache Spark version." + exit 1 ; + else + dotnet_spark_version="${valid_version}" + fi +} + +####################################### +# Replaces every occurence of search_string by replacement_string in a file +# Arguments: +# The file name +# The string to search for +# The string to replace the search string with +# Result: +# An updated file with the replaced string +####################################### +replace_text_in_file() { + local filename=${1} + local search_string=${2} + local replacement_string=${3} + + sh -c 'sed -i.bak "s/$1/$2/g" "$3" && rm "$3.bak"' _ "${search_string}" "${replacement_string}" "${filename}" +} + +####################################### +# Sets the microsoft-spark JAR name based on the Apache Spark version +####################################### +set_dotnet_spark_jar() { + local scala_version="2.11" + local short_spark_version="${apache_spark_short_version//./-}" + + case "${apache_spark_version:0:1}" in + 2) scala_version=2.11 ;; + 3) scala_version=2.12 ;; + esac + + dotnet_spark_jar="microsoft-spark-${short_spark_version}_${scala_version}-${dotnet_spark_version}.jar" +} + +####################################### +# Runs the docker build command with the related build arguments +# Arguments: +# The image name (incl. tag) +# Result: +# A local docker image with the specified name +####################################### +build_image() { + local image_name="${1}" + local build_args="--build-arg dotnet_core_version=${dotnet_core_version} + --build-arg dotnet_spark_version=${dotnet_spark_version} + --build-arg SPARK_VERSION=${apache_spark_version} + --build-arg DOTNET_SPARK_JAR=${dotnet_spark_jar}" + local cmd="docker build ${build_args} -t ${image_name} ." + + echo "Building ${image_name}" + + ${cmd} +} + +####################################### +# Use the Dockerfile in the sub-folder dotnet-interactive to build the image of the first stage +# Result: +# A dotnet-interactive docker image tagged with the .NET core version +####################################### +build_dotnet_interactive() { + local image_name="dotnet-interactive:${dotnet_core_version}" + + cd dotnet-interactive + build_image "${image_name}" + cd ~- +} + +####################################### +# Use the Dockerfile in the sub-folder dotnet-spark-base to build the image of the second stage +# The image contains the specified .NET for Apache Spark version +# Result: +# A dotnet-spark-base-interactive docker image tagged with the .NET for Apache Spark version +####################################### +build_dotnet_spark_base_interactive() { + local image_name="dotnet-spark-base-interactive:${dotnet_spark_version}" + + cd dotnet-spark-base + build_image "${image_name}" + cd ~- +} + +####################################### +# Use the Dockerfile in the sub-folder dotnet-spark to build the image of the last stage +# The image contains the specified Apache Spark version +# Result: +# A dotnet-spark docker image tagged with the .NET for Apache Spark version, Apache Spark version and the suffix -interactive +####################################### +build_dotnet_spark_interactive() { + local image_name="${image_repository}/dotnet-spark:${dotnet_spark_version}-${apache_spark_version}-interactive" + + cd dotnet-spark + cp --recursive templates/scripts ./bin + cp --recursive templates/HelloSpark ./HelloSpark + + replace_text_in_file HelloSpark/HelloSpark.csproj "<\/TargetFramework>" "netcoreapp${dotnet_core_version}<\/TargetFramework>" + replace_text_in_file HelloSpark/HelloSpark.csproj "PackageReference Include=\"Microsoft.Spark\" Version=\"\"" "PackageReference Include=\"Microsoft.Spark\" Version=\"${dotnet_spark_version}\"" + + replace_text_in_file HelloSpark/README.txt "netcoreappX.X" "netcoreapp${dotnet_core_version}" + replace_text_in_file HelloSpark/README.txt "spark-X.X.X" "spark-${apache_spark_short_version}.x" + replace_text_in_file HelloSpark/README.txt "microsoft-spark-${apache_spark_short_version}.x-X.X.X.jar" "${dotnet_spark_jar}" + + replace_text_in_file bin/start-spark-debug.sh "microsoft-spark-X.X.X.jar" "${dotnet_spark_jar}" + + replace_text_in_file 02-basic-example.ipynb "nuget: Microsoft.Spark,X.X.X" "${dotnet_spark_version}" + + build_image "${image_name}" + cd ~- +} + +####################################### +# Remove the temporary folders created during the different build stages +####################################### +cleanup() +{ + cd dotnet-spark + rm --recursive --force bin + rm --recursive --force HelloSpark + cd ~- +} + +finish() +{ + result=$? + cleanup + exit ${result} +} + +####################################### +# Display the help text +####################################### +print_help() { + cat < + + + + + + + + + + + + + + + + + + diff --git a/docker/images/interactive/dotnet-spark-base/Dockerfile b/docker/images/interactive/dotnet-spark-base/Dockerfile new file mode 100644 index 000000000..a231d197c --- /dev/null +++ b/docker/images/interactive/dotnet-spark-base/Dockerfile @@ -0,0 +1,15 @@ +ARG DOTNET_CORE_VERSION=3.1 +FROM dotnet-interactive:$DOTNET_CORE_VERSION +LABEL maintainer="Martin Kandlbinder " + +ARG DOTNET_SPARK_VERSION=1.0.0 +ENV DOTNET_SPARK_VERSION=$DOTNET_SPARK_VERSION \ + DOTNET_WORKER_DIR=/dotnet/Microsoft.Spark.Worker-${DOTNET_SPARK_VERSION} + +USER root + +RUN mkdir -p /dotnet/Debug/netcoreapp${DOTNET_CORE_VERSION} \ + && wget -q https://github.com/dotnet/spark/releases/download/v${DOTNET_SPARK_VERSION}/Microsoft.Spark.Worker.netcoreapp${DOTNET_CORE_VERSION}.linux-x64-${DOTNET_SPARK_VERSION}.tar.gz \ + && tar -xvzf Microsoft.Spark.Worker.netcoreapp${DOTNET_CORE_VERSION}.linux-x64-${DOTNET_SPARK_VERSION}.tar.gz --directory /dotnet \ + && chmod 755 /dotnet/Microsoft.Spark.Worker-${DOTNET_SPARK_VERSION}/Microsoft.Spark.Worker \ + && rm Microsoft.Spark.Worker.netcoreapp${DOTNET_CORE_VERSION}.linux-x64-${DOTNET_SPARK_VERSION}.tar.gz diff --git a/docker/images/interactive/dotnet-spark/01-start-spark-debug.ipynb b/docker/images/interactive/dotnet-spark/01-start-spark-debug.ipynb new file mode 100644 index 000000000..f3e246f46 --- /dev/null +++ b/docker/images/interactive/dotnet-spark/01-start-spark-debug.ipynb @@ -0,0 +1,54 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Start .NET for Apache Spark in Debug mode\n", + "\n", + + "Please run the cell below, before executing any .NET for Apache Spark code in a separate interactive .NET notebook.\n", + "\n", + "It will (after copying over the microsoft-spark JAR file) start the .NET for Apache Spark DotnetBackend in debug mode, using the current directory.\n", + "This has the benefit of being able to directly use any files (e.g. for import) located in this folder, without the need to specify the absolute path." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ] + } + ], + "source": [ + "! start-spark-debug.sh" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker/images/interactive/dotnet-spark/02-basic-example.ipynb b/docker/images/interactive/dotnet-spark/02-basic-example.ipynb new file mode 100644 index 000000000..7aad97cf3 --- /dev/null +++ b/docker/images/interactive/dotnet-spark/02-basic-example.ipynb @@ -0,0 +1,274 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# A basic .NET for Apache Spark example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Preparation\n", + "\n", + "### Start the Backend in Debug mode\n", + "\n", + "**_Important_**: Before you run any cells in this example, please ensure that you have [started the .NET for Apache Spark DotnetBacken in Debug mode](01-start-spark-debug.ipynb)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Install the Microsoft.Spark NuGet package" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#r \"nuget: Microsoft.Spark,1.0.0\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## Coding\n", + "\n", + "### Create a new SparkSession\n", + "The entry point to all .NET for Apache Spark functionality is a SparkSession. To create one, just use SparkSession.Builder():" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "using Microsoft.Spark.Sql;\n", + "using Microsoft.Spark.Sql.Types;\n", + "using static Microsoft.Spark.Sql.Functions;\n", + "\n", + "var spark = SparkSession.Builder().GetOrCreate();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a new DataFrame\n", + "There are multiple ways of creating new DataFrames. Most of the time you will read data from another source. For this basic example, we just define our DataFrame via the code below, however." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "var data = new List\n", + " {\n", + " new GenericRow(new object[] { \"Batman\", \"M\", 3093, true, new Date(1939, 5, 1) }),\n", + " new GenericRow(new object[] { \"Superman\", \"M\", 2496, true, new Date(1986, 10, 1) }),\n", + " new GenericRow(new object[] { \"Wonder Woman\", \"F\", 1231, true, new Date(1941, 12, 1) }),\n", + " new GenericRow(new object[] { \"Lois Lane\", \"F\", 934, true, new Date(1938, 6, 1) })\n", + " };\n", + "\n", + "var schema = new StructType(new List()\n", + " {\n", + " new StructField(\"Name\", new StringType()),\n", + " new StructField(\"Sex\", new StringType()),\n", + " new StructField(\"Appearances\", new IntegerType()),\n", + " new StructField(\"Alive\", new BooleanType()),\n", + " new StructField(\"FirstAppearance\", new DateType())\n", + " });\n", + "\n", + "DataFrame df = spark.CreateDataFrame(data, schema);" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get a quick overview of your data\n", + "\n", + "To verify/display the Spark data types of a DataFrame use **PrintSchema()**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.PrintSchema();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Use **Show()** to have a look at the first couple of rows of your DataFrame." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To get some basic DataFrame statistics, use **Describe()**." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Describe().Show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Filtering" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Column style filtering" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Filter(df.Col(\"Name\") == \"Batman\").Show();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Filter(df[\"Appearances\"] > 1000).Show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "SQL style Filtering" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Filter(\"Sex == 'F'\").Show();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Filter(\"FirstAppearance >= '1971-01-01'\").Show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.Filter(\"Name not like '%man'\").Show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Grouping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.GroupBy(\"Sex\").Count().Show();" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.GroupBy(\"Sex\")\n", + " .Agg(Count(df[\"Sex\"]), Avg(df[\"Appearances\"]), Min(df[\"Appearances\"]), Max(df[\"Appearances\"]))\n", + " .OrderBy(Desc(\"avg(Appearances)\"))\n", + " .Show();" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cleanup\n", + "Stop your spark session, once you are done." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "spark.Stop();" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".NET (C#)", + "language": "C#", + "name": ".net-csharp" + }, + "language_info": { + "file_extension": ".cs", + "mimetype": "text/x-csharp", + "name": "C#", + "pygments_lexer": "csharp", + "version": "8.0" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/docker/images/interactive/dotnet-spark/Dockerfile b/docker/images/interactive/dotnet-spark/Dockerfile new file mode 100644 index 000000000..4c9ce3dca --- /dev/null +++ b/docker/images/interactive/dotnet-spark/Dockerfile @@ -0,0 +1,37 @@ +ARG DOTNET_SPARK_VERSION=1.0.0 +FROM dotnet-spark-base-interactive:$DOTNET_SPARK_VERSION +LABEL maintainer="Martin Kandlbinder " + +ARG SPARK_VERSION=3.0.1 +ARG DOTNET_SPARK_JAR="microsoft-spark-3-0_2.12-$DOTNET_SPARK_VERSION" +ENV DAEMON_RUN=true \ + DOTNETBACKEND_PORT=5567 \ + HADOOP_VERSION=2.7 \ + JUPYTER_ENABLE_LAB=true \ + SPARK_VERSION=$SPARK_VERSION \ + SPARK_HOME=/spark \ + PATH="${SPARK_HOME}/bin:${DOTNET_WORKER_DIR}:${PATH}" + +USER root + +COPY bin/* /usr/local/bin/ +COPY *.ipynb ${HOME}/dotnet.spark/examples/ + +RUN cd /dotnet \ + && dotnet new console -o SparkDummy \ + && cd SparkDummy \ + && dotnet add package Microsoft.Spark \ + && dotnet build \ + && cp /dotnet/SparkDummy/bin/Debug/netcoreapp${DOTNET_CORE_VERSION}/microsoft-spark-*.jar ${HOME}/ \ + && rm -rf /dotnet/SparkDummy \ + && cd / \ + && echo "Downloading spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz ..." \ + && wget -q https://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ + && tar -xvzf spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ + && mv spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION} spark \ + && rm spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}.tgz \ + && chmod 755 /usr/local/bin/start-spark-debug.sh \ + && chown -R ${NB_UID} ${HOME} + +USER ${NB_USER} +WORKDIR ${HOME}/dotnet.spark diff --git a/docker/images/interactive/dotnet-spark/templates/HelloSpark/HelloSpark.csproj b/docker/images/interactive/dotnet-spark/templates/HelloSpark/HelloSpark.csproj new file mode 100644 index 000000000..9ec8d90dd --- /dev/null +++ b/docker/images/interactive/dotnet-spark/templates/HelloSpark/HelloSpark.csproj @@ -0,0 +1,12 @@ + + + + Exe + + + + + + + + diff --git a/docker/images/interactive/dotnet-spark/templates/HelloSpark/Program.cs b/docker/images/interactive/dotnet-spark/templates/HelloSpark/Program.cs new file mode 100644 index 000000000..9be1e8b8d --- /dev/null +++ b/docker/images/interactive/dotnet-spark/templates/HelloSpark/Program.cs @@ -0,0 +1,14 @@ +using Microsoft.Spark.Sql; + +namespace HelloSpark +{ + class Program + { + static void Main(string[] args) + { + var spark = SparkSession.Builder().GetOrCreate(); + var df = spark.Read().Json("people.json"); + df.Show(); + } + } +} diff --git a/docker/images/interactive/dotnet-spark/templates/HelloSpark/README.txt b/docker/images/interactive/dotnet-spark/templates/HelloSpark/README.txt new file mode 100644 index 000000000..31f707b62 --- /dev/null +++ b/docker/images/interactive/dotnet-spark/templates/HelloSpark/README.txt @@ -0,0 +1,13 @@ +Use the commands below to build and run the example as outline at https://github.com/dotnet/spark/blob/master/docs/getting-started/ubuntu-instructions.md + +dotnet build + +cp people.json /dotnet/HelloSpark/bin/Debug/netcoreappX.X +cd /dotnet/HelloSpark/bin/Debug/netcoreappX.X + +####### spark-X.X.X ####### +# Run locally +spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --master local microsoft-spark-X.X.X-X.X.X.jar dotnet HelloSpark.dll + +# To test out the example using the master and slave instances +spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --master spark://$HOSTNAME:$SPARK_MASTER_PORT microsoft-spark-X.X.X-X.X.X.jar dotnet HelloSpark.dll diff --git a/docker/images/interactive/dotnet-spark/templates/HelloSpark/people.json b/docker/images/interactive/dotnet-spark/templates/HelloSpark/people.json new file mode 100644 index 000000000..50a859cbd --- /dev/null +++ b/docker/images/interactive/dotnet-spark/templates/HelloSpark/people.json @@ -0,0 +1,3 @@ +{"name":"Michael"} +{"name":"Andy", "age":30} +{"name":"Justin", "age":19} diff --git a/docker/images/interactive/dotnet-spark/templates/scripts/start-spark-debug.sh b/docker/images/interactive/dotnet-spark/templates/scripts/start-spark-debug.sh new file mode 100644 index 000000000..5557cb061 --- /dev/null +++ b/docker/images/interactive/dotnet-spark/templates/scripts/start-spark-debug.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# Copy jar to the current working directory, if it does not exist already +jar_file_dest="$(pwd)/microsoft-spark-X.X.X.jar" +if ! [ -f "${jar_file_dest}" ]; then + cp "${HOME}/microsoft-spark-X.X.X.jar" "$(pwd)" +fi + +# Start the .NET for Apache Spark backend in debug mode +running=$(: &>/dev/null