From 7b2329f9dfba81e9881f1b2fb6a141c0399e1055 Mon Sep 17 00:00:00 2001 From: Xinyue Ruan Date: Tue, 19 Jul 2022 16:24:53 +0800 Subject: [PATCH 1/5] add dotnet installation & example doc --- website/docs/getting_started/installation.md | 6 + website/docs/reference/dotnet-setup.md | 242 +++++++++++++++++++ website/docusaurus.config.js | 5 + website/sidebars.js | 1 + website/src/pages/index.js | 15 ++ website/yarn.lock | 2 +- 6 files changed, 270 insertions(+), 1 deletion(-) create mode 100644 website/docs/reference/dotnet-setup.md diff --git a/website/docs/getting_started/installation.md b/website/docs/getting_started/installation.md index 48512d1788..16e711c55d 100644 --- a/website/docs/getting_started/installation.md +++ b/website/docs/getting_started/installation.md @@ -164,3 +164,9 @@ better integrate with intellij and SBT. To try out SynapseML using the R autogenerated wrappers [see our instructions](reference/R-setup.md). Note: This feature is still under development and some necessary custom wrappers may be missing. + +## C# (.NET) + +To try out SynapseML with .NET, please follow [instructions](reference/dotnet-setup.md). + +Note: This feature is not fully supported so some special functions may be missing. diff --git a/website/docs/reference/dotnet-setup.md b/website/docs/reference/dotnet-setup.md new file mode 100644 index 0000000000..1d7fca0ca6 --- /dev/null +++ b/website/docs/reference/dotnet-setup.md @@ -0,0 +1,242 @@ +--- +title: .NET setup +hide_title: true +sidebar_label: .NET setup +description: .NET setup and example for SynapseML +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +# .NET setup and example for SynapseML + +## Installation + +### 1. Install .NET + +To start building .NET apps, you need to download and install the .NET SDK (Software Development Kit). + +Download and install the [.NET Core SDK](https://dotnet.microsoft.com/en-us/download/dotnet/3.1). +Installing the SDK adds the dotnet toolchain to your PATH. + +Once you've installed the .NET Core SDK, open a new command prompt or terminal and run `dotnet`. + +If the command runs and prints out information about how to use dotnet, can move to the next step. +If you receive a `'dotnet' is not recognized as an internal or external command` error, make sure +you opened a new command prompt or terminal before running the command. + +### 2. Install Java + +Install [Java 8.1](https://www.oracle.com/java/technologies/downloads/#java8) for Windows and macOS, +or [OpenJDK 8](https://openjdk.org/install/) for Ubuntu. + +Select the appropriate version for your operating system. For example, select jdk-8u201-windows-x64.exe +for a Windows x64 machine or jdk-8u231-macosx-x64.dmg for macOS. Then, use the command java to verify the installation. + +### 3. Install Apache Spark + +[Download and install Apache Spark](https://spark.apache.org/downloads.html) with version >= 3.2.0. +(SynapseML v0.10.0 only supports spark version >= 3.2.0) + +Extract downloaded zipped files (with 7-Zip app on Windows or `tar` on linux) and remember the location of +extracted files, we take `~/bin/spark-3.2.0-bin-hadoop3.2/` as an example here. + +Run the following commands to set the environment variables used to locate Apache Spark. +On Windows, make sure to run the command prompt in administrator mode. + + + + setx /M HADOOP_HOME C:\bin\spark-3.2.0-bin-hadoop3.2\ + setx /M SPARK_HOME C:\bin\spark-3.2.0-bin-hadoop3.2\ + setx /M PATH "%PATH%;%HADOOP_HOME%;%SPARK_HOME%bin" # Warning: Don't run this if your path is already long as it will truncate your path to 1024 characters and potentially remove entries! + + + + + export SPARK_HOME=~/bin/spark-3.2.0-bin-hadoop3.2/ + export PATH="$SPARK_HOME/bin:$PATH" + source ~/.bashrc + + + + +Once you've installed everything and set your environment variables, open a **new** command prompt or terminal and run the following command: +```bash +spark-submit --version +``` +If the command runs and prints version information, you can move to the next step. + +If you receive a `'spark-submit' is not recognized as an internal or external command` error, make sure you opened a **new** command prompt. + +### 4. Install .NET for Apache Spark + +Download the [Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases) **v2.1.1** release from the .NET for Apache Spark GitHub. +For example if you're on a Windows machine and plan to use .NET Core, download the Windows x64 netcoreapp3.1 release. + +Extract Microsoft.Spark.Worker and remember the location. + +### 5. Install WinUtils (Windows Only) + +.NET for Apache Spark requires WinUtils to be installed alongside Apache Spark. +[Download winutils.exe](https://github.com/steveloughran/winutils/blob/master/hadoop-3.0.0/bin/winutils.exe). +Then, copy WinUtils into C:\bin\spark-3.2.0-bin-hadoop3.2\bin. +:::note +If you are using a different version of Hadoop, which is annotated at the end of your Spark install folder name, select the version of WinUtils that's compatible with your version of Hadoop. +::: + +### 6. Set DOTNET_WORKER_DIR and check dependencies + +Run one of the following commands to set the DOTNET_WORKER_DIR environment variable, which is used by .NET apps to locate .NET for Apache Spark +worker binaries. Make sure to replace with the directory where you downloaded and extracted the Microsoft.Spark.Worker. +On Windows, make sure to run the command prompt in administrator mode. + + + + + setx /M DOTNET_WORKER_DIR + + + + + export DOTNET_WORKER_DIR= + + + + +Finally, double-check that you can run `dotnet, java, spark-shell` from your command line before you move to the next section. + +## Write a .NET for SynapseML App + +### 1. Create a console app + +In your command prompt or terminal, run the following commands to create a new console application: +```powershell +dotnet new console -o SynapseMLApp +cd SynapseMLApp +``` +The `dotnet` command creates a new application of type console for you. The -o parameter creates a directory +named `SynapseMLApp` where your app is stored and populates it with the required files. +The `cd SynapseMLApp` command changes the directory to the app directory you created. + +### 2. Install Nuget package + +To use .NET for Apache Spark in an app, install the Microsoft.Spark package. +In your command prompt or terminal, run the following command: +```powershell +dotnet add package Microsoft.Spark --version 2.1.1 +``` +:::note +This tutorial uses Microsoft.Spark 2.1.1 version as SynapseML 0.10.0 depends on it. +Change to corresponding version if necessary. +::: + +To use SynapseML features in the app, install SynapseML.X package. +In this tutorial, we use SynapseML.Cognitive as an example. +In your command prompt or terminal, run the following command: +```powershell +# Update Nuget Config to include SynapseML Feed +dotnet nuget add source https://mmlspark.blob.core.windows.net/synapsemlnuget/index.json -n SynapseMLFeed +dotnet add package SynapseML.Cognitive --version 0.10.0 +``` +The `dotnet nuget add` command adds SynapseML's resolver to the source, so that our package can be found. + +### 3. Write your app +Open Program.cs file in Visual Studio Code, or any text editor, and replace all of the code with the following: +```csharp +using System; +using System.Collections.Generic; +using Synapse.ML.Cognitive; +using Microsoft.Spark.Sql; +using Microsoft.Spark.Sql.Types; + +namespace SynapseMLApp +{ + class Program + { static void Main(string[] args) + { + // Create Spark session + SparkSession spark = + SparkSession + .Builder() + .AppName("TextSentimentExample") + .GetOrCreate(); + + // Create DataFrame + DataFrame df = spark.CreateDataFrame( + new List + { + new GenericRow(new object[] {"I am so happy today, its sunny!", "en-US"}), + new GenericRow(new object[] {"I am frustrated by this rush hour traffic", "en-US"}), + new GenericRow(new object[] {"The cognitive services on spark aint bad", "en-US"}) + }, + new StructType(new List + { + new StructField("text", new StringType()), + new StructField("language", new StringType()) + }) + ); + + // Create TextSentiment + var model = new TextSentiment() + .SetSubscriptionKey("df74b0018d394ca0ab2173f3623ca7a1") + .SetLocation("eastus") + .SetTextCol("text") + .SetOutputCol("sentiment") + .SetErrorCol("error") + .SetLanguageCol("language"); + + // Transform + var outputDF = model.Transform(df); + + // Display results + outputDF.Show(); + + // Stop Spark session + spark.Stop(); + } + } +} +``` +[SparkSession](https://docs.microsoft.com/en-us/dotnet/api/microsoft.spark.sql.sparksession?view=spark-dotnet) is the entrypoint +of Apache Spark applications, which manages the context and information of your application. A DataFrame is a way of organizing +data into a set of named columns. + +Create a [TextSentiment](https://mmlspark.blob.core.windows.net/docs/0.10.0/dotnet/classSynapse_1_1ML_1_1Cognitive_1_1TextSentiment.html) +instance, set corresponding subscription key and other configurations. Then, apply transformation to the dataframe, +which analyzes the sentiment based on each row, and stores result into output column. + +The result of the transformation is stored in another DataFrame. Note that at this point, no operations have taken place because +.NET for Apache Spark lazily evaluates the data. It's not until the Show method is called to display the contents of the words +transformed DataFrame to the console that the operations defined in the lines above execute. Once you no longer need the Spark +session, use the Stop method to stop your session. + +### 4. Run your .NET App +Run the following command to build your application: +```powershell +dotnet build +``` +Navigate to your build output directory (In windows for example you could run `cd bin\Debug\net5.0`). +Use the spark-submit command to submit your application to run on Apache Spark. +```powershell +spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --packages com.microsoft.azure:synapseml_2.12:0.10.0 --master local microsoft-spark-3-2_2.12-2.1.1.jar dotnet SynapseMLApp.dll +``` +`--packages com.microsoft.azure:synapseml_2.12:0.10.0` specifies the dependency on synapseml_2.12 version 0.10.0; +`microsoft-spark-3-2_2.12-2.1.1.jar` specifies Microsoft.Spark version 2.1.1 and Spark version 3.2 +:::note +This command assumes you have downloaded Apache Spark and added it to your PATH environment variable to be able to use spark-submit. +Otherwise, you'd have to use the full path (for example, C:\bin\apache-spark\bin\spark-submit or ~/spark/bin/spark-submit). +::: + +When your app runs, the sentiment analysis result is written to the console. +``` ++-----------------------------------------+--------+-----+--------------------------------------------------+ +| text|language|error| sentiment| ++-----------------------------------------+--------+-----+--------------------------------------------------+ +| I am so happy today, its sunny!| en-US| null|[{positive, null, {0.99, 0.0, 0.0}, [{I am so h...| +|I am frustrated by this rush hour traffic| en-US| null|[{negative, null, {0.0, 0.0, 0.99}, [{I am frus...| +| The cognitive services on spark aint bad| en-US| null|[{negative, null, {0.0, 0.01, 0.99}, [{The cogn...| ++-----------------------------------------+--------+-----+--------------------------------------------------+ +``` +Congratulations! You successfully authored and ran a .NET for SynapseML app. +Refer to the [developer docs](https://mmlspark.blob.core.windows.net/docs/0.10.0/dotnet/index.html) for API guidance. diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index ba729ad054..7f00eeab3a 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -28,6 +28,7 @@ module.exports = { prism: { theme: require('./src/plugins/prism_themes/github'), darkTheme: require('./src/plugins/prism_themes/monokai'), + additionalLanguages: ['csharp', 'powershell'], }, colorMode: { defaultMode: 'dark', @@ -62,6 +63,10 @@ module.exports = { { label: 'Scala', href: `https://mmlspark.blob.core.windows.net/docs/${version}/scala/com/microsoft/azure/synapse/ml/index.html`, + }, + { + label: 'C#', + href: `https://mmlspark.blob.core.windows.net/docs/${version}/dotnet/index.html`, } ] }, diff --git a/website/sidebars.js b/website/sidebars.js index 4e340d40de..614d48fab0 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -133,6 +133,7 @@ module.exports = { 'reference/contributing_guide', 'reference/docker', 'reference/R-setup', + 'reference/dotnet-setup', 'reference/SAR', 'reference/cyber', 'reference/datasets', diff --git a/website/src/pages/index.js b/website/src/pages/index.js index d26e0fb0f0..25baa45058 100644 --- a/website/src/pages/index.js +++ b/website/src/pages/index.js @@ -263,6 +263,7 @@ function Home() { { label: "Docker", value: "Docker" }, { label: "Python", value: "Python" }, { label: "SBT", value: "SBT" }, + { label: ".NET", value: "dotnet" } ]} > @@ -407,6 +408,20 @@ libraryDependencies += "com.microsoft.azure" %% "synapseml_2.12" % "0.10.0" // P lang="jsx" > + + To try out SynapseML with .NET, you should add SynapseML's assembly into reference: + + For detailed installation, please refer this{" "} + instruction. + diff --git a/website/yarn.lock b/website/yarn.lock index ee00b203c3..bc8bb3deec 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -5275,7 +5275,7 @@ node-releases@^2.0.6: resolved "https://registry.yarnpkg.com/node-releases/-/node-releases-2.0.6.tgz#8a7088c63a55e493845683ebf3c828d8c51c5503" integrity sha512-PiVXnNuFm5+iYkLBNeq5211hvO38y63T0i2KKh2KnUs3RpzJ+JtODFjkD8yjLwnDkTYF1eKXheUwdssR+NRZdg== -node@^16.16.0: +node@^16.14: version "16.16.0" resolved "https://registry.yarnpkg.com/node/-/node-16.16.0.tgz#192d06e3e540957fda1d6043b360e584a9c8244f" integrity sha512-I0NJaiQZ443wBqHUfIOx/EjdAO3uP1frg96pLEvrTv9tMyVDgSURYX1x69lNTLaJAF4pCifRwCHEFubN3/K/iQ== From 8f2f574ef29da10955c77f5ec32bf0e47d8e670f Mon Sep 17 00:00:00 2001 From: Xinyue Ruan Date: Tue, 19 Jul 2022 16:29:01 +0800 Subject: [PATCH 2/5] docs: update README --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 18084bdbd7..16b2db4431 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,11 @@ To try out SynapseML using the R autogenerated wrappers [see our instructions](website/docs/reference/R-setup.md). Note: This feature is still under development and some necessary custom wrappers may be missing. +### C# (.NET) + +To try out SynapseML with .NET, please follow [instructions](website/docs/reference/dotnet-setup.md). +Note: This feature is not fully supported so some special functions may be missing. + ### Building from source SynapseML has recently transitioned to a new build infrastructure. From 535a51e842ad1d4343c1fc6fc158a0c4595cdacd Mon Sep 17 00:00:00 2001 From: Xinyue Ruan Date: Tue, 19 Jul 2022 16:34:10 +0800 Subject: [PATCH 3/5] fix dependabot alerts --- website/package.json | 1 + website/yarn.lock | 11 +++++++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/website/package.json b/website/package.json index 975eae45fd..38a47b194c 100644 --- a/website/package.json +++ b/website/package.json @@ -22,6 +22,7 @@ "ansi-html-community": "^0.0.8", "caniuse-lite": "^1.0.30001359", "classnames": "^2.2.6", + "glob-parent": "^6.0.1", "got": "^11.8.5", "hast-util-is-element": "1.1.0", "node": "^16.14", diff --git a/website/yarn.lock b/website/yarn.lock index bc8bb3deec..bec7c9ea38 100644 --- a/website/yarn.lock +++ b/website/yarn.lock @@ -4006,13 +4006,20 @@ github-slugger@^1.4.0: resolved "https://registry.yarnpkg.com/github-slugger/-/github-slugger-1.4.0.tgz#206eb96cdb22ee56fdc53a28d5a302338463444e" integrity sha512-w0dzqw/nt51xMVmlaV1+JRzN+oCa1KfcgGEWhxUG16wbdA+Xnt/yoFO8Z8x/V82ZcZ0wy6ln9QDup5avbhiDhQ== -glob-parent@^5.1.2, glob-parent@^6.0.1, glob-parent@~5.1.2: +glob-parent@^5.1.2, glob-parent@~5.1.2: version "5.1.2" resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-5.1.2.tgz#869832c58034fe68a4093c17dc15e8340d8401c4" integrity sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow== dependencies: is-glob "^4.0.1" +glob-parent@^6.0.1: + version "6.0.2" + resolved "https://registry.yarnpkg.com/glob-parent/-/glob-parent-6.0.2.tgz#6d237d99083950c79290f24c7642a3de9a28f9e3" + integrity sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A== + dependencies: + is-glob "^4.0.3" + glob-to-regexp@^0.4.1: version "0.4.1" resolved "https://registry.yarnpkg.com/glob-to-regexp/-/glob-to-regexp-0.4.1.tgz#c75297087c851b9a578bd217dd59a92f59fe546e" @@ -4623,7 +4630,7 @@ is-fullwidth-code-point@^3.0.0: resolved "https://registry.yarnpkg.com/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz#f116f8064fe90b3f7844a38997c0b75051269f1d" integrity sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg== -is-glob@^4.0.1, is-glob@~4.0.1: +is-glob@^4.0.1, is-glob@^4.0.3, is-glob@~4.0.1: version "4.0.3" resolved "https://registry.yarnpkg.com/is-glob/-/is-glob-4.0.3.tgz#64f61e42cbbb2eec2071a9dac0b28ba1e65d5084" integrity sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg== From ecd462ae6cb1594128bfab58e508e8874ff5c5bc Mon Sep 17 00:00:00 2001 From: Xinyue Ruan Date: Tue, 19 Jul 2022 16:36:21 +0800 Subject: [PATCH 4/5] stage the changes into version 0.10.0 as well --- .../getting_started/installation.md | 6 + .../version-0.10.0/reference/dotnet-setup.md | 242 ++++++++++++++++++ .../version-0.10.0-sidebars.json | 1 + 3 files changed, 249 insertions(+) create mode 100644 website/versioned_docs/version-0.10.0/reference/dotnet-setup.md diff --git a/website/versioned_docs/version-0.10.0/getting_started/installation.md b/website/versioned_docs/version-0.10.0/getting_started/installation.md index 48512d1788..16e711c55d 100644 --- a/website/versioned_docs/version-0.10.0/getting_started/installation.md +++ b/website/versioned_docs/version-0.10.0/getting_started/installation.md @@ -164,3 +164,9 @@ better integrate with intellij and SBT. To try out SynapseML using the R autogenerated wrappers [see our instructions](reference/R-setup.md). Note: This feature is still under development and some necessary custom wrappers may be missing. + +## C# (.NET) + +To try out SynapseML with .NET, please follow [instructions](reference/dotnet-setup.md). + +Note: This feature is not fully supported so some special functions may be missing. diff --git a/website/versioned_docs/version-0.10.0/reference/dotnet-setup.md b/website/versioned_docs/version-0.10.0/reference/dotnet-setup.md new file mode 100644 index 0000000000..1d7fca0ca6 --- /dev/null +++ b/website/versioned_docs/version-0.10.0/reference/dotnet-setup.md @@ -0,0 +1,242 @@ +--- +title: .NET setup +hide_title: true +sidebar_label: .NET setup +description: .NET setup and example for SynapseML +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + + +# .NET setup and example for SynapseML + +## Installation + +### 1. Install .NET + +To start building .NET apps, you need to download and install the .NET SDK (Software Development Kit). + +Download and install the [.NET Core SDK](https://dotnet.microsoft.com/en-us/download/dotnet/3.1). +Installing the SDK adds the dotnet toolchain to your PATH. + +Once you've installed the .NET Core SDK, open a new command prompt or terminal and run `dotnet`. + +If the command runs and prints out information about how to use dotnet, can move to the next step. +If you receive a `'dotnet' is not recognized as an internal or external command` error, make sure +you opened a new command prompt or terminal before running the command. + +### 2. Install Java + +Install [Java 8.1](https://www.oracle.com/java/technologies/downloads/#java8) for Windows and macOS, +or [OpenJDK 8](https://openjdk.org/install/) for Ubuntu. + +Select the appropriate version for your operating system. For example, select jdk-8u201-windows-x64.exe +for a Windows x64 machine or jdk-8u231-macosx-x64.dmg for macOS. Then, use the command java to verify the installation. + +### 3. Install Apache Spark + +[Download and install Apache Spark](https://spark.apache.org/downloads.html) with version >= 3.2.0. +(SynapseML v0.10.0 only supports spark version >= 3.2.0) + +Extract downloaded zipped files (with 7-Zip app on Windows or `tar` on linux) and remember the location of +extracted files, we take `~/bin/spark-3.2.0-bin-hadoop3.2/` as an example here. + +Run the following commands to set the environment variables used to locate Apache Spark. +On Windows, make sure to run the command prompt in administrator mode. + + + + setx /M HADOOP_HOME C:\bin\spark-3.2.0-bin-hadoop3.2\ + setx /M SPARK_HOME C:\bin\spark-3.2.0-bin-hadoop3.2\ + setx /M PATH "%PATH%;%HADOOP_HOME%;%SPARK_HOME%bin" # Warning: Don't run this if your path is already long as it will truncate your path to 1024 characters and potentially remove entries! + + + + + export SPARK_HOME=~/bin/spark-3.2.0-bin-hadoop3.2/ + export PATH="$SPARK_HOME/bin:$PATH" + source ~/.bashrc + + + + +Once you've installed everything and set your environment variables, open a **new** command prompt or terminal and run the following command: +```bash +spark-submit --version +``` +If the command runs and prints version information, you can move to the next step. + +If you receive a `'spark-submit' is not recognized as an internal or external command` error, make sure you opened a **new** command prompt. + +### 4. Install .NET for Apache Spark + +Download the [Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases) **v2.1.1** release from the .NET for Apache Spark GitHub. +For example if you're on a Windows machine and plan to use .NET Core, download the Windows x64 netcoreapp3.1 release. + +Extract Microsoft.Spark.Worker and remember the location. + +### 5. Install WinUtils (Windows Only) + +.NET for Apache Spark requires WinUtils to be installed alongside Apache Spark. +[Download winutils.exe](https://github.com/steveloughran/winutils/blob/master/hadoop-3.0.0/bin/winutils.exe). +Then, copy WinUtils into C:\bin\spark-3.2.0-bin-hadoop3.2\bin. +:::note +If you are using a different version of Hadoop, which is annotated at the end of your Spark install folder name, select the version of WinUtils that's compatible with your version of Hadoop. +::: + +### 6. Set DOTNET_WORKER_DIR and check dependencies + +Run one of the following commands to set the DOTNET_WORKER_DIR environment variable, which is used by .NET apps to locate .NET for Apache Spark +worker binaries. Make sure to replace with the directory where you downloaded and extracted the Microsoft.Spark.Worker. +On Windows, make sure to run the command prompt in administrator mode. + + + + + setx /M DOTNET_WORKER_DIR + + + + + export DOTNET_WORKER_DIR= + + + + +Finally, double-check that you can run `dotnet, java, spark-shell` from your command line before you move to the next section. + +## Write a .NET for SynapseML App + +### 1. Create a console app + +In your command prompt or terminal, run the following commands to create a new console application: +```powershell +dotnet new console -o SynapseMLApp +cd SynapseMLApp +``` +The `dotnet` command creates a new application of type console for you. The -o parameter creates a directory +named `SynapseMLApp` where your app is stored and populates it with the required files. +The `cd SynapseMLApp` command changes the directory to the app directory you created. + +### 2. Install Nuget package + +To use .NET for Apache Spark in an app, install the Microsoft.Spark package. +In your command prompt or terminal, run the following command: +```powershell +dotnet add package Microsoft.Spark --version 2.1.1 +``` +:::note +This tutorial uses Microsoft.Spark 2.1.1 version as SynapseML 0.10.0 depends on it. +Change to corresponding version if necessary. +::: + +To use SynapseML features in the app, install SynapseML.X package. +In this tutorial, we use SynapseML.Cognitive as an example. +In your command prompt or terminal, run the following command: +```powershell +# Update Nuget Config to include SynapseML Feed +dotnet nuget add source https://mmlspark.blob.core.windows.net/synapsemlnuget/index.json -n SynapseMLFeed +dotnet add package SynapseML.Cognitive --version 0.10.0 +``` +The `dotnet nuget add` command adds SynapseML's resolver to the source, so that our package can be found. + +### 3. Write your app +Open Program.cs file in Visual Studio Code, or any text editor, and replace all of the code with the following: +```csharp +using System; +using System.Collections.Generic; +using Synapse.ML.Cognitive; +using Microsoft.Spark.Sql; +using Microsoft.Spark.Sql.Types; + +namespace SynapseMLApp +{ + class Program + { static void Main(string[] args) + { + // Create Spark session + SparkSession spark = + SparkSession + .Builder() + .AppName("TextSentimentExample") + .GetOrCreate(); + + // Create DataFrame + DataFrame df = spark.CreateDataFrame( + new List + { + new GenericRow(new object[] {"I am so happy today, its sunny!", "en-US"}), + new GenericRow(new object[] {"I am frustrated by this rush hour traffic", "en-US"}), + new GenericRow(new object[] {"The cognitive services on spark aint bad", "en-US"}) + }, + new StructType(new List + { + new StructField("text", new StringType()), + new StructField("language", new StringType()) + }) + ); + + // Create TextSentiment + var model = new TextSentiment() + .SetSubscriptionKey("df74b0018d394ca0ab2173f3623ca7a1") + .SetLocation("eastus") + .SetTextCol("text") + .SetOutputCol("sentiment") + .SetErrorCol("error") + .SetLanguageCol("language"); + + // Transform + var outputDF = model.Transform(df); + + // Display results + outputDF.Show(); + + // Stop Spark session + spark.Stop(); + } + } +} +``` +[SparkSession](https://docs.microsoft.com/en-us/dotnet/api/microsoft.spark.sql.sparksession?view=spark-dotnet) is the entrypoint +of Apache Spark applications, which manages the context and information of your application. A DataFrame is a way of organizing +data into a set of named columns. + +Create a [TextSentiment](https://mmlspark.blob.core.windows.net/docs/0.10.0/dotnet/classSynapse_1_1ML_1_1Cognitive_1_1TextSentiment.html) +instance, set corresponding subscription key and other configurations. Then, apply transformation to the dataframe, +which analyzes the sentiment based on each row, and stores result into output column. + +The result of the transformation is stored in another DataFrame. Note that at this point, no operations have taken place because +.NET for Apache Spark lazily evaluates the data. It's not until the Show method is called to display the contents of the words +transformed DataFrame to the console that the operations defined in the lines above execute. Once you no longer need the Spark +session, use the Stop method to stop your session. + +### 4. Run your .NET App +Run the following command to build your application: +```powershell +dotnet build +``` +Navigate to your build output directory (In windows for example you could run `cd bin\Debug\net5.0`). +Use the spark-submit command to submit your application to run on Apache Spark. +```powershell +spark-submit --class org.apache.spark.deploy.dotnet.DotnetRunner --packages com.microsoft.azure:synapseml_2.12:0.10.0 --master local microsoft-spark-3-2_2.12-2.1.1.jar dotnet SynapseMLApp.dll +``` +`--packages com.microsoft.azure:synapseml_2.12:0.10.0` specifies the dependency on synapseml_2.12 version 0.10.0; +`microsoft-spark-3-2_2.12-2.1.1.jar` specifies Microsoft.Spark version 2.1.1 and Spark version 3.2 +:::note +This command assumes you have downloaded Apache Spark and added it to your PATH environment variable to be able to use spark-submit. +Otherwise, you'd have to use the full path (for example, C:\bin\apache-spark\bin\spark-submit or ~/spark/bin/spark-submit). +::: + +When your app runs, the sentiment analysis result is written to the console. +``` ++-----------------------------------------+--------+-----+--------------------------------------------------+ +| text|language|error| sentiment| ++-----------------------------------------+--------+-----+--------------------------------------------------+ +| I am so happy today, its sunny!| en-US| null|[{positive, null, {0.99, 0.0, 0.0}, [{I am so h...| +|I am frustrated by this rush hour traffic| en-US| null|[{negative, null, {0.0, 0.0, 0.99}, [{I am frus...| +| The cognitive services on spark aint bad| en-US| null|[{negative, null, {0.0, 0.01, 0.99}, [{The cogn...| ++-----------------------------------------+--------+-----+--------------------------------------------------+ +``` +Congratulations! You successfully authored and ran a .NET for SynapseML app. +Refer to the [developer docs](https://mmlspark.blob.core.windows.net/docs/0.10.0/dotnet/index.html) for API guidance. diff --git a/website/versioned_sidebars/version-0.10.0-sidebars.json b/website/versioned_sidebars/version-0.10.0-sidebars.json index b1d56bec04..ce26ba7424 100644 --- a/website/versioned_sidebars/version-0.10.0-sidebars.json +++ b/website/versioned_sidebars/version-0.10.0-sidebars.json @@ -175,6 +175,7 @@ "reference/contributing_guide", "reference/docker", "reference/R-setup", + "reference/dotnet-setup", "reference/SAR", "reference/cyber", "reference/datasets", From 7ff4c1bba9ea9de5311b2e640af4b107b1c9875e Mon Sep 17 00:00:00 2001 From: Xinyue Ruan Date: Wed, 20 Jul 2022 09:24:51 +0800 Subject: [PATCH 5/5] address comments --- README.md | 3 +-- website/docs/getting_started/installation.md | 4 +--- website/docs/reference/dotnet-setup.md | 2 +- .../version-0.10.0/getting_started/installation.md | 4 +--- .../versioned_docs/version-0.10.0/reference/dotnet-setup.md | 2 +- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 16b2db4431..01b74c9759 100644 --- a/README.md +++ b/README.md @@ -208,8 +208,7 @@ and some necessary custom wrappers may be missing. ### C# (.NET) -To try out SynapseML with .NET, please follow [instructions](website/docs/reference/dotnet-setup.md). -Note: This feature is not fully supported so some special functions may be missing. +To try out SynapseML with .NET, please follow the [.NET Installation Guide](website/docs/reference/dotnet-setup.md). ### Building from source diff --git a/website/docs/getting_started/installation.md b/website/docs/getting_started/installation.md index 16e711c55d..c0cdbdfc1b 100644 --- a/website/docs/getting_started/installation.md +++ b/website/docs/getting_started/installation.md @@ -167,6 +167,4 @@ and some necessary custom wrappers may be missing. ## C# (.NET) -To try out SynapseML with .NET, please follow [instructions](reference/dotnet-setup.md). - -Note: This feature is not fully supported so some special functions may be missing. +To try out SynapseML with .NET, please follow the [.NET Installation Guide](reference/dotnet-setup.md). diff --git a/website/docs/reference/dotnet-setup.md b/website/docs/reference/dotnet-setup.md index 1d7fca0ca6..f96bf6d91e 100644 --- a/website/docs/reference/dotnet-setup.md +++ b/website/docs/reference/dotnet-setup.md @@ -179,7 +179,7 @@ namespace SynapseMLApp // Create TextSentiment var model = new TextSentiment() - .SetSubscriptionKey("df74b0018d394ca0ab2173f3623ca7a1") + .SetSubscriptionKey("YOUR_SUBSCRIPTION_KEY") .SetLocation("eastus") .SetTextCol("text") .SetOutputCol("sentiment") diff --git a/website/versioned_docs/version-0.10.0/getting_started/installation.md b/website/versioned_docs/version-0.10.0/getting_started/installation.md index 16e711c55d..c0cdbdfc1b 100644 --- a/website/versioned_docs/version-0.10.0/getting_started/installation.md +++ b/website/versioned_docs/version-0.10.0/getting_started/installation.md @@ -167,6 +167,4 @@ and some necessary custom wrappers may be missing. ## C# (.NET) -To try out SynapseML with .NET, please follow [instructions](reference/dotnet-setup.md). - -Note: This feature is not fully supported so some special functions may be missing. +To try out SynapseML with .NET, please follow the [.NET Installation Guide](reference/dotnet-setup.md). diff --git a/website/versioned_docs/version-0.10.0/reference/dotnet-setup.md b/website/versioned_docs/version-0.10.0/reference/dotnet-setup.md index 1d7fca0ca6..f96bf6d91e 100644 --- a/website/versioned_docs/version-0.10.0/reference/dotnet-setup.md +++ b/website/versioned_docs/version-0.10.0/reference/dotnet-setup.md @@ -179,7 +179,7 @@ namespace SynapseMLApp // Create TextSentiment var model = new TextSentiment() - .SetSubscriptionKey("df74b0018d394ca0ab2173f3623ca7a1") + .SetSubscriptionKey("YOUR_SUBSCRIPTION_KEY") .SetLocation("eastus") .SetTextCol("text") .SetOutputCol("sentiment")