diff --git a/CODEOWNERS b/CODEOWNERS index 1f292a04a6..6153356e53 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -29,7 +29,6 @@ explainers/ @memoryz #Serena's Areas website/ @serena-ruan -core/src/main/dotnet @serena-ruan core/src/main/scala/com/microsoft/azure/synapse/ml/codegen @serena-ruan # Scott's Areas diff --git a/README.md b/README.md index 3db779765c..5f179846bb 100644 --- a/README.md +++ b/README.md @@ -81,7 +81,6 @@ First select the correct platform that you are installing SynapseML into: - [Apache Livy and HDInsight](#apache-livy-and-hdinsight) - [Docker](#docker) - [R](#r) -- [C# (.NET)](#c-net) - [Building from source](#building-from-source) @@ -237,11 +236,6 @@ To try out SynapseML using the R autogenerated wrappers [see our instructions](https://microsoft.github.io/SynapseML/docs/Reference/R%20Setup/). Note: This feature is still under development and some necessary custom wrappers may be missing. -### C# (.NET) - -To try out SynapseML with .NET, please follow the [.NET Installation Guide](https://microsoft.github.io/SynapseML/docs/Reference/Dotnet%20Setup/). -Please note that some classes including the `AzureSearchWriter`, `DiagnosticInfo`, `UDPyFParam`, `ParamSpaceParam`, `BallTreeParam`, `ConditionalBallTreeParam`, `LightGBMBoosterParam` are still under development and not exposed in .NET yet. - ### Building from source SynapseML has recently transitioned to a new build infrastructure. diff --git a/build.sbt b/build.sbt index c8a827a140..0b6f9f6f79 100644 --- a/build.sbt +++ b/build.sbt @@ -112,122 +112,6 @@ rootGenDir := { join(targetDir, "generated") } -// scalastyle:off line.size.limit -val genSleetConfig = TaskKey[File]("genSleetConfig", - "generate sleet.json file for sleet configuration so we can push nuget package to the blob") -genSleetConfig := { - val fileContent = - s"""{ - | "username": "", - | "useremail": "", - | "sources": [ - | { - | "name": "SynapseMLNuget", - | "type": "azure", - | "container": "synapsemlnuget", - | "path": "https://mmlspark.blob.core.windows.net/synapsemlnuget", - | "connectionString": "DefaultEndpointsProtocol=https;AccountName=mmlspark;AccountKey=${Secrets.storageKey};EndpointSuffix=core.windows.net" - | } - | ] - |}""".stripMargin - val sleetJsonFile = join(rootGenDir.value, "sleet.json") - if (sleetJsonFile.exists()) FileUtils.forceDelete(sleetJsonFile) - FileUtils.writeStringToFile(sleetJsonFile, fileContent, "utf-8") - sleetJsonFile -} -// scalastyle:on line.size.limit - -val publishDotnetTestBase = TaskKey[Unit]("publishDotnetTestBase", - "generate dotnet test helper file with current library version and publish E2E test base") -publishDotnetTestBase := { - val fileContent = - s"""// Licensed to the .NET Foundation under one or more agreements. - |// The .NET Foundation licenses this file to you under the MIT license. - |// See the LICENSE file in the project root for more information. - | - |namespace SynapseMLtest.Utils - |{ - | public class Helper - | { - | public static string GetSynapseMLPackage() - | { - | return "com.microsoft.azure:synapseml_2.12:${version.value}"; - | } - | } - | - |} - |""".stripMargin - val dotnetTestBaseDir = join(baseDirectory.value, "core", "src", "main", "dotnet", "test") - val dotnetHelperFile = join(dotnetTestBaseDir, "SynapseMLVersion.cs") - if (dotnetHelperFile.exists()) FileUtils.forceDelete(dotnetHelperFile) - FileUtils.writeStringToFile(dotnetHelperFile, fileContent, "utf-8") - - val dotnetTestBaseProjContent = - s""" - | - | - | netstandard2.1 - | 9.0 - | SynapseML.DotnetE2ETest - | true - | SynapseML .NET Test Base - | ${dotnetedVersion(version.value)} - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | Microsoft.Spark - | - | - | - | false - | - | - |""".stripMargin - // update the version of current dotnetTestBase assembly - val dotnetTestBaseProj = join(dotnetTestBaseDir, "dotnetTestBase.csproj") - if (dotnetTestBaseProj.exists()) FileUtils.forceDelete(dotnetTestBaseProj) - FileUtils.writeStringToFile(dotnetTestBaseProj, dotnetTestBaseProjContent, "utf-8") - - packDotnetAssemblyCmd(join(dotnetTestBaseDir, "target").getAbsolutePath, dotnetTestBaseDir) - val packagePath = join(dotnetTestBaseDir, - "target", s"SynapseML.DotnetE2ETest.${dotnetedVersion(version.value)}.nupkg").getAbsolutePath - publishDotnetAssemblyCmd(packagePath, genSleetConfig.value) -} - -// This command should be run only when you make an update to DotnetBase proj, and it will override -// existing nuget package with the same version number -val publishDotnetBase = TaskKey[Unit]("publishDotnetBase", - "publish dotnet base nuget package that contains core elements for SynapseML in C#") -publishDotnetBase := { - val dotnetBaseDir = join(baseDirectory.value, "core", "src", "main", "dotnet", "src") - packDotnetAssemblyCmd(join(dotnetBaseDir, "target").getAbsolutePath, dotnetBaseDir) - val packagePath = join(dotnetBaseDir, - // Update the version whenever there's a new release - "target", s"SynapseML.DotnetBase.${dotnetedVersion("1.0.4")}.nupkg").getAbsolutePath - publishDotnetAssemblyCmd(packagePath, genSleetConfig.value) -} - def runTaskForAllInCompile(task: TaskKey[Unit]): Def.Initialize[Task[Seq[Unit]]] = { task.all(ScopeFilter( inProjects(core, deepLearning, cognitive, vw, lightgbm, opencv), @@ -246,29 +130,6 @@ generatePythonDoc := { runCmd(activateCondaEnv ++ Seq("sphinx-build", "-b", "html", "doc", "../../../doc/pyspark"), dir) } -val generateDotnetDoc = TaskKey[Unit]("generateDotnetDoc", "Generate documentation for dotnet classes") -generateDotnetDoc := { - Def.sequential( - runTaskForAllInCompile(dotnetCodeGen), - runTaskForAllInCompile(mergeDotnetCode) - ).value - val dotnetSrcDir = join(rootGenDir.value, "src", "dotnet") - runCmd(Seq("doxygen", "-g"), dotnetSrcDir) - FileUtils.copyFile(join(baseDirectory.value, "README.md"), join(dotnetSrcDir, "README.md")) - runCmd(Seq("sed", "-i", s"""s/img width=\"800\"/img width=\"300\"/g""", "README.md"), dotnetSrcDir) - val packageName = name.value.split("-").map(_.capitalize).mkString(" ") - val fileContent = - s"""PROJECT_NAME = "$packageName" - |PROJECT_NUMBER = "${dotnetedVersion(version.value)}" - |USE_MDFILE_AS_MAINPAGE = "README.md" - |RECURSIVE = YES - |""".stripMargin - val doxygenHelperFile = join(dotnetSrcDir, "DoxygenHelper.txt") - if (doxygenHelperFile.exists()) FileUtils.forceDelete(doxygenHelperFile) - FileUtils.writeStringToFile(doxygenHelperFile, fileContent, "utf-8") - runCmd(Seq("bash", "-c", "cat DoxygenHelper.txt >> Doxyfile", ""), dotnetSrcDir) - runCmd(Seq("doxygen"), dotnetSrcDir) -} val packageSynapseML = TaskKey[Unit]("packageSynapseML", "package all projects into SynapseML") packageSynapseML := { @@ -337,11 +198,10 @@ publishPypi := { ) } -val publishDocs = TaskKey[Unit]("publishDocs", "publish docs for scala, python and dotnet") +val publishDocs = TaskKey[Unit]("publishDocs", "publish docs for scala and python") publishDocs := { Def.sequential( generatePythonDoc, - generateDotnetDoc, (root / Compile / unidoc) ).value val html = @@ -358,9 +218,6 @@ publishDocs := { if (scalaDir.exists()) FileUtils.forceDelete(scalaDir) FileUtils.copyDirectory(join(targetDir, "unidoc"), scalaDir) FileUtils.writeStringToFile(join(unifiedDocDir.toString, "index.html"), html, "utf-8") - val dotnetDir = join(unifiedDocDir.toString, "dotnet") - if (dotnetDir.exists()) FileUtils.forceDelete(dotnetDir) - FileUtils.copyDirectory(join(codegenDir, "src", "dotnet", "html"), dotnetDir) uploadToBlob(unifiedDocDir.toString, version.value, "docs") } diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala index 1932f8cc41..aff6902ecc 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/CognitiveServiceBase.scala @@ -224,28 +224,6 @@ trait HasCustomCogServiceDomain extends Wrappable with HasURL with HasUrlPath { |""".stripMargin } - override def dotnetAdditionalMethods: String = super.dotnetAdditionalMethods + { - s"""/// - |/// Sets value for service name - |/// - |/// - |/// Service name of the cognitive service if it's custom domain - |/// - |/// New $dotnetClassName object - |public $dotnetClassName SetCustomServiceName(string value) => - | $dotnetClassWrapperName(Reference.Invoke(\"setCustomServiceName\", value)); - | - |/// - |/// Sets value for endpoint - |/// - |/// - |/// Endpoint of the cognitive service - |/// - |/// New $dotnetClassName object - |public $dotnetClassName SetEndpoint(string value) => - | $dotnetClassWrapperName(Reference.Invoke(\"setEndpoint\", value)); - |""".stripMargin - } } trait HasAPIVersion extends HasServiceParams { @@ -403,18 +381,6 @@ trait HasSetLinkedService extends Wrappable with HasURL with HasSubscriptionKey |""".stripMargin } - override def dotnetAdditionalMethods: String = super.dotnetAdditionalMethods + { - s"""/// - |/// Sets value for linkedService - |/// - |/// - |/// linkedService name - |/// - |/// New $dotnetClassName object - |public $dotnetClassName SetLinkedService(string value) => - | $dotnetClassWrapperName(Reference.Invoke(\"setLinkedService\", value)); - |""".stripMargin - } def setLinkedService(v: String): this.type = { val classPath = "mssparkutils.cognitiveService" @@ -458,20 +424,6 @@ trait HasSetLocation extends Wrappable with HasURL with HasUrlPath with DomainHe |""".stripMargin } - override def dotnetAdditionalMethods: String = super.dotnetAdditionalMethods + { - s"""/// - |/// Sets value for location - |/// - |/// - |/// Location of the cognitive service - |/// - |/// New $dotnetClassName object - |public $dotnetClassName SetLocation(string value) => - | $dotnetClassWrapperName(Reference.Invoke(\"setLocation\", value)); - |""".stripMargin - } - - def setLocation(v: String): this.type = { val domain = getLocationDomain(v) setUrl(s"https://$v.api.cognitive.microsoft.$domain/" + urlPath.stripPrefix("/")) diff --git a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/text/TextAnalytics.scala b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/text/TextAnalytics.scala index b1239fbfa7..5e002a6e55 100644 --- a/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/text/TextAnalytics.scala +++ b/cognitive/src/main/scala/com/microsoft/azure/synapse/ml/services/text/TextAnalytics.scala @@ -465,19 +465,6 @@ class AnalyzeHealthText(override val uid: String) super.postprocessResponse(processedResponseOpt.orNull) } - /* - override private[ml] def dotnetTestValue(v: Seq[TextAnalyzeTask]): String = - v.map(x => s"new TextAnalyzeTask(new Dictionary" + - s"${DotnetWrappableParam.dotnetDefaultRender(x.parameters)})").mkString(",") - */ - - /* - override def rConstructorLine(v: Seq[TextAnalyzeTask]): String = { - val className = "com.microsoft.azure.synapse.ml.cognitive.text.TextAnalyzeTask" - val elements = v.map(x => s"""invoke_new(sc, "${className}", ${RWrappableParam.rDefaultRender(x.parameters)})""") - s"${rName(v)}=${elements}".replace("=List(", "=list(") - }*/ - override def postprocessResponseUdf: UserDefinedFunction = { UDFUtils.oldUdf(postprocessResponse _, ArrayType(UnpackedAHTResponse.schema)) } diff --git a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/search/SearchWriterSuite.scala b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/search/SearchWriterSuite.scala index 8539ef17bd..da4e24ce84 100644 --- a/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/search/SearchWriterSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/azure/synapse/ml/services/search/SearchWriterSuite.scala @@ -131,7 +131,9 @@ class SearchWriterSuite extends TestBase with AzureSearchKey with IndexJsonGette lazy val indexName: String = generateIndexName() override def beforeAll(): Unit = { - print("WARNING CREATING SEARCH ENGINE!") + println("WARNING CREATING SEARCH ENGINE!") + println("materializing keys for tests") + println((openAIAPIKey + openAIAPIKeyGpt4 + openAIServiceName + cognitiveKey).length) SearchIndex.createIfNoneExists(azureSearchKey, testServiceName, createSimpleIndexJson(indexName)) diff --git a/core/src/main/dotnet/src/Base/Schemas.cs b/core/src/main/dotnet/src/Base/Schemas.cs deleted file mode 100644 index b64cc6eaf2..0000000000 --- a/core/src/main/dotnet/src/Base/Schemas.cs +++ /dev/null @@ -1,274 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using System.Collections.Generic; - -namespace SynapseML.Dotnet.Utils -{ - - public sealed class TextAnalyzeTask : IJvmObjectReferenceProvider - { - - public Dictionary Parameters { get; init; } - - public TextAnalyzeTask(Dictionary parameters) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.services.text.TextAnalyzeTask", parameters.ToJavaHashMap())) - { - } - - internal TextAnalyzeTask(JvmObjectReference jvmObject) - { - Reference = jvmObject; - JvmObjectReference parameters = (JvmObjectReference)Reference.Invoke("parameters"); - JvmObjectReference hashMap = (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod( - "org.apache.spark.api.dotnet.DotnetUtils", "convertToJavaMap", parameters); - JvmObjectReference[] keySet = (JvmObjectReference[])( - (JvmObjectReference)hashMap.Invoke("keySet")).Invoke("toArray"); - var dict = new Dictionary(); - foreach (var key in keySet) - { - dict[(string)key.Invoke("toString")] = (string)((JvmObjectReference)parameters.Invoke("get", key)).Invoke("get"); - } - this.Parameters = dict; - } - - public JvmObjectReference Reference { get; init; } - } - - public sealed class TimeSeriesPoint : IJvmObjectReferenceProvider - { - - public string TimeStamp { get; init; } - - public double Value { get; init; } - - public TimeSeriesPoint(string timestamp, double value) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.services.anomaly.TimeSeriesPoint", timestamp, value)) - { - } - - internal TimeSeriesPoint(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.TimeStamp = (string)Reference.Invoke("timestamp"); - this.Value = (double)Reference.Invoke("value"); - } - - public JvmObjectReference Reference { get; init; } - } - - public sealed class TextAndTranslation : IJvmObjectReferenceProvider - { - - public string Text { get; init; } - - public string Translation { get; init; } - - public TextAndTranslation(string text, string translation) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.services.translate.TextAndTranslation", text, translation)) - { - } - - internal TextAndTranslation(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.Text = (string)Reference.Invoke("text"); - this.Translation = (string)Reference.Invoke("translation"); - } - - public JvmObjectReference Reference { get; init; } - } - -#nullable enable - public sealed class TargetInput : IJvmObjectReferenceProvider - { - public string? Category { get; init; } - public Glossary[]? Glossaries { get; init; } - public string TargetUrl { get; init; } - public string Language { get; init; } - public string? StorageSource { get; init; } - - public TargetInput(string targetUrl, string language, string? category = null, Glossary[]? glossaries = null, string? storageSource = null) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.services.translate.TargetInput", targetUrl, language, category, glossaries, storageSource)) - { - } - - internal TargetInput(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.Category = (string)Reference.Invoke("category"); - JvmObjectReference[] jvmObjects = (JvmObjectReference[])Reference.Invoke("glossaries"); - Glossary[] glossaries = new Glossary[jvmObjects.Length]; - for (int i = 0; i < jvmObjects.Length; i++) - { - glossaries[i] = new Glossary(jvmObjects[i]); - } - this.Glossaries = glossaries; - this.TargetUrl = (string)Reference.Invoke("targetUrl"); - this.Language = (string)Reference.Invoke("language"); - this.StorageSource = (string)Reference.Invoke("storageSource"); - } - - public JvmObjectReference Reference { get; init; } - } - - public sealed class Glossary : IJvmObjectReferenceProvider - { - public string Format { get; init; } - public string GlossaryUrl { get; init; } - public string? StorageSource { get; init; } - public string? Version { get; init; } - - public Glossary(string format, string glossaryUrl, string? storageSource = null, string? version = null) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.services.translate.Glossary", format, glossaryUrl, storageSource, version)) - { - } - - internal Glossary(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.Format = (string)Reference.Invoke("format"); - this.GlossaryUrl = (string)Reference.Invoke("glossaryUrl"); - this.StorageSource = (string)Reference.Invoke("storageSource"); - this.Version = (string)Reference.Invoke("version"); - } - - public JvmObjectReference Reference { get; init; } - - } - - public sealed class ICECategoricalFeature : IJvmObjectReferenceProvider - { - public string Name { get; init; } - public int? NumTopValues { get; init; } - public string? OutputColName { get; init; } - - public ICECategoricalFeature(string name, int? numTopValues = null, string? outputColName = null) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.explainers.ICECategoricalFeature", name, numTopValues, outputColName)) - { - } - - internal ICECategoricalFeature(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.Name = (string)Reference.Invoke("name"); - this.NumTopValues = (int)Reference.Invoke("numTopValues"); - this.OutputColName = (string)Reference.Invoke("outputColName"); - } - - public JvmObjectReference Reference { get; init; } - - public bool Validate() => (bool)Reference.Invoke("validate"); - - public int GetNumTopValue() => (int)Reference.Invoke("getNumTopValue"); - } - - public sealed class ICENumericFeature : IJvmObjectReferenceProvider - { - public string Name { get; init; } - public int? NumSplits { get; init; } - public double? RangeMin { get; init; } - public double? RangeMax { get; init; } - public string? OutputColName { get; init; } - - public ICENumericFeature(string name, int? numTopValues = null, double? RangeMin = null, double? RangeMax = null, string? outputColName = null) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.explainers.ICENumericFeature", name, numTopValues, outputColName)) - { - } - - internal ICENumericFeature(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.Name = (string)Reference.Invoke("name"); - this.NumSplits = (int)Reference.Invoke("numSplits"); - this.RangeMin = (double)Reference.Invoke("rangeMin"); - this.RangeMax = (double)Reference.Invoke("rangeMax"); - this.OutputColName = (string)Reference.Invoke("outputColName"); - } - - public JvmObjectReference Reference { get; init; } - - public bool Validate() => (bool)Reference.Invoke("validate"); - - public int GetNumSplits() => (int)Reference.Invoke("getNumSplits"); - } - - public sealed class ModelState : IJvmObjectReferenceProvider - { - public int[]? EpochIds { get; init; } - public double[]? TrainLosses { get; init; } - public double[]? ValidationLosses { get; init; } - public double[]? LatenciesInSeconds { get; init; } - - public ModelState(int[]? epochIds = null, double[]? trainLosses = null, double[]? validationLosses = null, double[]? latenciesInSeconds = null) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.services.anomaly.ModelState", epochIds, trainLosses, validationLosses, latenciesInSeconds)) - { - } - - internal ModelState(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.EpochIds = (int[])Reference.Invoke("epochIds"); - this.TrainLosses = (double[])Reference.Invoke("trainLosses"); - this.ValidationLosses = (double[])Reference.Invoke("validationLosses"); - this.LatenciesInSeconds = (double[])Reference.Invoke("latenciesInSeconds"); - } - - public JvmObjectReference Reference { get; init; } - - } - - public sealed class DMAVariableState : IJvmObjectReferenceProvider - { - public string? Variable { get; init; } - public double? FilledNARatio { get; init; } - public int? EffectiveCount { get; init; } - public string? FirstTimestamp { get; init; } - public string? LastTimestamp { get; init; } - - - public DMAVariableState(string? variable = null, double? filledNARatio = null, int? effectiveCount = null, string? firstTimestamp = null, string? lastTimestamp = null) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.services.anomaly.DMAVariableState", variable, filledNARatio, effectiveCount, firstTimestamp, lastTimestamp)) - { - } - - internal DMAVariableState(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.Variable = (string)Reference.Invoke("variable"); - this.FilledNARatio = (double)Reference.Invoke("filledNARatio"); - this.EffectiveCount = (int)Reference.Invoke("effectiveCount"); - this.FirstTimestamp = (string)Reference.Invoke("firstTimestamp"); - this.LastTimestamp = (string)Reference.Invoke("lastTimestamp"); - } - - public JvmObjectReference Reference { get; init; } - - } - - public sealed class DiagnosticsInfo: IJvmObjectReferenceProvider - { - public ModelState? ModelState { get; init; } - public DMAVariableState? VariableStates { get; init; } - - public DiagnosticsInfo(ModelState? modelState = null, DMAVariableState? variableStates = null) - : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.azure.synapse.ml.services.anomaly.DiagnosticsInfo", modelState, variableStates)) - { - } - - internal DiagnosticsInfo(JvmObjectReference jvmObject) - { - Reference = jvmObject; - this.ModelState = new ModelState((JvmObjectReference)Reference.Invoke("modelState")); - this.VariableStates = new DMAVariableState((JvmObjectReference)Reference.Invoke("variableStates")); - } - - public JvmObjectReference Reference { get; init; } - - } - -#nullable disable -} diff --git a/core/src/main/dotnet/src/Internal/Dotnet/CompilerServices.cs b/core/src/main/dotnet/src/Internal/Dotnet/CompilerServices.cs deleted file mode 100644 index 2e00473c25..0000000000 --- a/core/src/main/dotnet/src/Internal/Dotnet/CompilerServices.cs +++ /dev/null @@ -1,8 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -namespace System.Runtime.CompilerServices -{ - internal static class IsExternalInit {} -} diff --git a/core/src/main/dotnet/src/Internal/Dotnet/DictionaryExtensions.cs b/core/src/main/dotnet/src/Internal/Dotnet/DictionaryExtensions.cs deleted file mode 100644 index e84f3d51a1..0000000000 --- a/core/src/main/dotnet/src/Internal/Dotnet/DictionaryExtensions.cs +++ /dev/null @@ -1,28 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Internal.Java.Util; - -namespace System.Collections.Generic -{ - public static class Dictionary - { - /// - /// A custom extension method that helps transform from dotnet - /// Dictionary<string, string> to java.util.HashMap. - /// - /// a Dictionary instance - /// - internal static HashMap ToJavaHashMap(this Dictionary dictionary) - { - var hashMap = new HashMap(SparkEnvironment.JvmBridge); - foreach (KeyValuePair item in dictionary) - { - hashMap.Put(item.Key, item.Value); - } - return hashMap; - } - } -} diff --git a/core/src/main/dotnet/src/Params/HyperparamBuilder.cs b/core/src/main/dotnet/src/Params/HyperparamBuilder.cs deleted file mode 100644 index 030312cd52..0000000000 --- a/core/src/main/dotnet/src/Params/HyperparamBuilder.cs +++ /dev/null @@ -1,210 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Utils; - -namespace Synapse.ML.Automl -{ - // - /// Specifies the search space for hyperparameters. - /// - public class HyperparamBuilder : IJvmObjectReferenceProvider - { - private static readonly string s_className = "com.microsoft.azure.synapse.ml.automl.HyperparamBuilder"; - - /// - /// Creates a new instance of a - /// - public HyperparamBuilder() - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className)) - { - } - - internal HyperparamBuilder(JvmObjectReference jvmObject) - { - Reference = jvmObject; - } - - public JvmObjectReference Reference { get; init; } - - public HyperparamBuilder AddHyperparam(Param param, Dist values) => - WrapAsHyperparamBuilder((JvmObjectReference)Reference.Invoke("addHyperparam", param, values)); - - public (Param, DistObject)[] Build() { - JvmObjectReference[] jvmObjects = (JvmObjectReference[])Reference.Invoke("build"); - var result = new (Param, DistObject)[jvmObjects.Length]; - Dictionary classMapping = JvmObjectUtils.ConstructJavaClassMapping( - typeof(DistObject), - "s_className"); - for (int i = 0; i < jvmObjects.Length; i++) - { - Param param = new Param((JvmObjectReference)jvmObjects[i].Invoke("_1")); - JvmObjectReference distObject = (JvmObjectReference)jvmObjects[i].Invoke("_2"); - if (JvmObjectUtils.TryConstructInstanceFromJvmObject( - distObject, - classMapping, - out DistObject instance)) - { - result[i] = (param, instance); - } - } - return result; - } - - private static HyperparamBuilder WrapAsHyperparamBuilder(object obj) => - new HyperparamBuilder((JvmObjectReference)obj); - - } - - public abstract class RangeHyperParam : Dist, IJvmObjectReferenceProvider - { - public RangeHyperParam(string className, T min, T max, long seed) - : this(SparkEnvironment.JvmBridge.CallConstructor(className, min, max, seed)) - { - } - - internal RangeHyperParam(JvmObjectReference jvmObject) - { - Reference = jvmObject; - } - - public JvmObjectReference Reference { get; init; } - - } - - public class IntRangeHyperParam : RangeHyperParam - { - private static readonly string s_className = "com.microsoft.azure.synapse.ml.automl.IntRangeHyperParam"; - - public IntRangeHyperParam(int min, int max, long seed = 0) - : base(s_className, min, max, seed) - { - } - - internal IntRangeHyperParam(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - public override int GetNext() => - (int)Reference.Invoke("getNext"); - - public override ParamPair GetParamPair(Param param) => - new ParamPair((JvmObjectReference)Reference.Invoke("getParamPair", param)); - - private static IntRangeHyperParam WrapAsIntRangeHyperParam(object obj) => - new IntRangeHyperParam((JvmObjectReference)obj); - - } - - public class LongRangeHyperParam : RangeHyperParam - { - private static readonly string s_className = "com.microsoft.azure.synapse.ml.automl.LongRangeHyperParam"; - - public LongRangeHyperParam(long min, long max, long seed = 0) - : base(s_className, min, max, seed) - { - } - - internal LongRangeHyperParam(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - public override long GetNext() => - (long)Reference.Invoke("getNext"); - - public override ParamPair GetParamPair(Param param) => - new ParamPair((JvmObjectReference)Reference.Invoke("getParamPair", param)); - - private static LongRangeHyperParam WrapAsLongRangeHyperParam(object obj) => - new LongRangeHyperParam((JvmObjectReference)obj); - - } - - public class FloatRangeHyperParam : RangeHyperParam - { - private static readonly string s_className = "com.microsoft.azure.synapse.ml.automl.FloatRangeHyperParam"; - - public FloatRangeHyperParam(float min, float max, long seed = 0) - : base(s_className, min, max, seed) - { - } - - internal FloatRangeHyperParam(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - public override float GetNext() => - (float)Reference.Invoke("getNext"); - - public override ParamPair GetParamPair(Param param) => - new ParamPair((JvmObjectReference)Reference.Invoke("getParamPair", param)); - - private static FloatRangeHyperParam WrapAsFloatRangeHyperParam(object obj) => - new FloatRangeHyperParam((JvmObjectReference)obj); - - } - - public class DoubleRangeHyperParam : RangeHyperParam - { - private static readonly string s_className = "com.microsoft.azure.synapse.ml.automl.DoubleRangeHyperParam"; - - public DoubleRangeHyperParam(double min, double max, long seed = 0) - : base(s_className, min, max, seed) - { - } - - internal DoubleRangeHyperParam(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - public override double GetNext() => - (double)Reference.Invoke("getNext"); - - public override ParamPair GetParamPair(Param param) => - new ParamPair((JvmObjectReference)Reference.Invoke("getParamPair", param)); - - private static DoubleRangeHyperParam WrapAsDoubleRangeHyperParam(object obj) => - new DoubleRangeHyperParam((JvmObjectReference)obj); - - } - - public class DiscreteHyperParam : Dist, IJvmObjectReferenceProvider - { - private static readonly string s_className = "com.microsoft.azure.synapse.ml.automl.DiscreteHyperParam"; - - /// - /// Creates a new instance of a - /// - public DiscreteHyperParam(T[] values, long seed = 0) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, values, seed)) - { - } - - internal DiscreteHyperParam(JvmObjectReference jvmObject) - { - Reference = jvmObject; - } - - public JvmObjectReference Reference { get; init; } - - public override T GetNext() => - (T)Reference.Invoke("getNext"); - - public override ParamPair GetParamPair(Param param) => - new ParamPair((JvmObjectReference)Reference.Invoke("getParamPair", param)); - - private static DiscreteHyperParam WrapAsDiscreteHyperParam(object obj) => - new DiscreteHyperParam((JvmObjectReference)obj); - - } - - -} diff --git a/core/src/main/dotnet/src/Params/LightGBMBooster.cs b/core/src/main/dotnet/src/Params/LightGBMBooster.cs deleted file mode 100644 index 5cecdd0b2f..0000000000 --- a/core/src/main/dotnet/src/Params/LightGBMBooster.cs +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Sql; - -namespace Synapse.ML.LightGBM.Param -{ - /// - /// Represents a LightGBM Booster learner - /// - public class LightGBMBooster : IJvmObjectReferenceProvider - { - private static readonly string s_className = "com.microsoft.azure.synapse.ml.lightgbm.booster.LightGBMBooster"; - -#nullable enable - public LightGBMBooster(LightGBMDataset? trainDataset = null, string? parameters = null, string? modelStr = null) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, trainDataset, parameters, modelStr)) - { - } -#nullable disable - - public LightGBMBooster(LightGBMDataset trainDataset, string parameters) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, trainDataset, parameters)) - { - } - - public LightGBMBooster(string model) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, model)) - { - } - - internal LightGBMBooster(JvmObjectReference jvmObject) - { - Reference = jvmObject; - } - - public JvmObjectReference Reference { get; private set; } - - /// Merges this Booster with the specified model. - /// The string serialized representation of the learner to merge. - public void MergeBooster(string model) => - Reference.Invoke("mergeBooster", model); - - /// Adds the specified LightGBMDataset to be the validation dataset. - /// The LightGBMDataset to add as the validation dataset. - public void AddValidationDataset(LightGBMDataset dataset) => - Reference.Invoke("addValidationDataset", dataset); - - /// Saves the booster to string representation. - /// The serialized string representation of the Booster. - public string SaveToString() => - (string)Reference.Invoke("saveToString"); - - /// Get the evaluation dataset column names from the native booster. - /// The evaluation dataset column names. - public string[] GetEvalNames() => - (string[])Reference.Invoke("getEvalNames"); - - // /// - // /// Get the evaluation for the training data and validation data. - // /// - // /// The names of the evaluation metrics. - // /// - // /// Index of data, 0: training data, 1: 1st validation data, 2: 2nd validation data and so on. - // /// - // /// Array of tuples containing the evaluation metric name and metric value. - // public Tuple[] GetEvalResults(string[] evalNames, int dataIndex) => - // (Tuple[])Reference.Invoke("getEvalResults", evalNames, dataIndex); - - /// Reset the specified parameters on the native booster. - /// The new parameters to set. - public void ResetParameter(string newParameters) => - Reference.Invoke("resetParameter", newParameters); - - // /// - // /// Get predictions for the training and evaluation data on the booster. - // /// - // /// - // /// Index of data, 0: training data, 1: 1st validation data, 2: 2nd validation data and so on. - // /// - // /// Whether this is a classification scenario or not. - // /// - // /// The predictions as a 2D array where first level is for row index and second level is optional if there are classes. - // /// - // public double[][] InnerPredict(int dataIndex, bool classification) => - // (double[][])Reference.Invoke("innerPredict", dataIndex, classification); - - /// Updates the booster for one iteration. - /// True if terminated training early. - public bool UpdateOneIteration() => - (bool)Reference.Invoke("updateOneIteration"); - - /// - /// Updates the booster with custom loss function for one iteration. - /// - /// The gradient from custom loss function. - /// The hessian matrix from custom loss function. - /// True if terminated training early. - public bool UpdateOneIterationCustom(float[] gradient, float[] hessian) => - (bool)Reference.Invoke("updateOneIterationCustom", gradient, hessian); - - /// - /// Sets the start index of the iteration to predict. - /// If <= 0, starts from the first iteration. - /// - /// The start index of the iteration to predict. - public void SetStartIteration(int startIteration) => - Reference.Invoke("setStartIteration", startIteration); - - /// - /// Sets the total number of iterations used in the prediction. - /// If <= 0, all iterations from ``start_iteration`` are used (no limits). - /// - /// The total number of iterations used in the prediction. - public void SetNumIterations(int numIterations) => - Reference.Invoke("setNumIterations", numIterations); - - /// - /// Sets the best iteration and also the numIterations to be the best iteration. - /// - /// The best iteration computed by early stopping. - public void SetBestIteration(int bestIteration) => - Reference.Invoke("setBestIteration", bestIteration); - - /// - /// Saves the native model serialized representation to file. - /// - /// The spark session - /// The name of the file to save the model to - /// Whether to overwrite if the file already exists - public void SaveNativeModel(SparkSession session, string filename, bool overwrite) => - Reference.Invoke("saveNativeModel", session, filename, overwrite); - - /// - /// Dumps the native model pointer to file. - /// - /// The spark session - /// The name of the file to save the model to - /// Whether to overwrite if the file already exists - public void DumpModel(SparkSession session, string filename, bool overwrite) => - Reference.Invoke("dumpModel", session, filename, overwrite); - - /// - /// Frees any native memory held by the underlying booster pointer. - /// - public void FreeNativeMemory() => - Reference.Invoke("freeNativeMemory"); - - /// - /// Calls into LightGBM to retrieve the feature importances. - /// - /// Can be "split" or "gain" - /// The feature importance values as an array. - public double[] GetFeatureImportances(string importanceType) => - (double[])Reference.Invoke("getFeatureImportances", importanceType); - - } - - public class LightGBMDataset : IJvmObjectReferenceProvider - { - - // doesn't support public constructor yet - // public LightGBMDataset(SWIGTYPE_p_void datasetPtr) - // : this(SparkEnvironment.JvmBridge.CallConstructor("com.microsoft.ml.spark.lightgbm.dataset.LightGBMDataset", datasetPtr)) - // { - // } - - internal LightGBMDataset(JvmObjectReference jvmObject) - { - Reference = jvmObject; - } - - public JvmObjectReference Reference { get; private set; } - - public float[] GetLabel() => (float[])Reference.Invoke("getLabel"); - - public int NumData() => (int)Reference.Invoke("numData"); - - public int NumFeature() => (int)Reference.Invoke("numFeature"); - - public void ValidateDataset() => Reference.Invoke("validateDataset"); - - public void AddDoubleField(double[] field, string fieldName, int numRows) => - Reference.Invoke("addDoubleField", field, fieldName, numRows); - - public void AddIntField(int[] field, string fieldName, int numRows) => - Reference.Invoke("addIntField", field, fieldName, numRows); - - public void AddGroupColumn(T[] rows) => - Reference.Invoke("addGroupColumn", rows); - -#nullable enable - public void SetFeatureNames(string[]? featureNamesOpt, int numCols) => - Reference.Invoke("setFeatureNames", featureNamesOpt, numCols); -#nullable disable - - public void Close() => - Reference.Invoke("close"); - - } - -} diff --git a/core/src/main/dotnet/src/Params/ParamMap.cs b/core/src/main/dotnet/src/Params/ParamMap.cs deleted file mode 100644 index 4f19a95801..0000000000 --- a/core/src/main/dotnet/src/Params/ParamMap.cs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -using System.Collections.Generic; - -namespace Microsoft.Spark.ML.Feature.Param -{ - // - /// Represents the parameter values. - /// - public abstract class ParamSpace - { - public abstract IEnumerable ParamMaps(); - } - -} diff --git a/core/src/main/dotnet/src/Params/ParamSpace.cs b/core/src/main/dotnet/src/Params/ParamSpace.cs deleted file mode 100644 index ec9a680b72..0000000000 --- a/core/src/main/dotnet/src/Params/ParamSpace.cs +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -using System.Collections.Generic; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.ML.Feature.Param; - -namespace Synapse.ML.Automl -{ - - public abstract class DistObject - { - } - - // - /// Represents a distribution of values. - /// - /// - /// The type T of the values generated. - /// - public abstract class Dist : DistObject - { - public abstract T GetNext(); - - public abstract ParamPair GetParamPair(Param param); - - } - - - // - /// Represents a generator of parameters with specified distributions added by the HyperparamBuilder. - /// - public class RandomSpace : ParamSpace, IJvmObjectReferenceProvider - { - private static readonly string s_className = "com.microsoft.azure.synapse.ml.automl.RandomSpace"; - - /// - /// Creates a new instance of a - /// - public RandomSpace((Param, DistObject)[] value) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, value)) - { - } - - internal RandomSpace(JvmObjectReference jvmObject) - { - Reference = jvmObject; - } - - public JvmObjectReference Reference { get; init; } - - override public IEnumerable ParamMaps() => - (IEnumerable)Reference.Invoke("paramMaps"); - } - -} diff --git a/core/src/main/dotnet/src/dotnetBase.csproj b/core/src/main/dotnet/src/dotnetBase.csproj deleted file mode 100644 index 2f15aeb266..0000000000 --- a/core/src/main/dotnet/src/dotnetBase.csproj +++ /dev/null @@ -1,41 +0,0 @@ - - - - netstandard2.1 - 9.0 - SynapseML.DotnetBase - true - - SynapseML .NET Base - 1.0.4 - - - - - - - - - - - - - - - - - - - - - - - - Microsoft.Spark - - - - false - - - diff --git a/core/src/main/dotnet/src/org/apache/spark/ml/classification/LogisticRegression.cs b/core/src/main/dotnet/src/org/apache/spark/ml/classification/LogisticRegression.cs deleted file mode 100644 index 365296d91b..0000000000 --- a/core/src/main/dotnet/src/org/apache/spark/ml/classification/LogisticRegression.cs +++ /dev/null @@ -1,508 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Interop.Internal.Java.Util; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Sql.Types; -using Microsoft.Spark.Utils; -using SynapseML.Dotnet.Utils; -using Synapse.ML.LightGBM.Param; -using Microsoft.Spark.ML.Classification; - -namespace Microsoft.Spark.ML.Classification -{ - /// - /// implements LogisticRegression - /// - public class LogisticRegression : JavaEstimator, IJavaMLWritable, IJavaMLReadable - { - private static readonly string s_className = "org.apache.spark.ml.classification.LogisticRegression"; - - /// - /// Creates a without any parameters. - /// - public LogisticRegression() : base(s_className) - { - } - - /// - /// Creates a with a UID that is used to give the - /// a unique ID. - /// - /// An immutable unique ID for the object and its derivatives. - public LogisticRegression(string uid) : base(s_className, uid) - { - } - - internal LogisticRegression(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - /// - /// Sets aggregationDepth value for - /// - /// - /// suggested depth for treeAggregate (>= 2) - /// - /// New LogisticRegression object - public LogisticRegression SetAggregationDepth(int value) => - WrapAsLogisticRegression(Reference.Invoke("setAggregationDepth", (object)value)); - - /// - /// Sets elasticNetParam value for - /// - /// - /// the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty - /// - /// New LogisticRegression object - public LogisticRegression SetElasticNetParam(double value) => - WrapAsLogisticRegression(Reference.Invoke("setElasticNetParam", (object)value)); - - /// - /// Sets family value for - /// - /// - /// The name of family which is a description of the label distribution to be used in the model. Supported options: auto, binomial, multinomial. - /// - /// New LogisticRegression object - public LogisticRegression SetFamily(string value) => - WrapAsLogisticRegression(Reference.Invoke("setFamily", (object)value)); - - /// - /// Sets featuresCol value for - /// - /// - /// features column name - /// - /// New LogisticRegression object - public LogisticRegression SetFeaturesCol(string value) => - WrapAsLogisticRegression(Reference.Invoke("setFeaturesCol", (object)value)); - - /// - /// Sets fitIntercept value for - /// - /// - /// whether to fit an intercept term - /// - /// New LogisticRegression object - public LogisticRegression SetFitIntercept(bool value) => - WrapAsLogisticRegression(Reference.Invoke("setFitIntercept", (object)value)); - - /// - /// Sets labelCol value for - /// - /// - /// label column name - /// - /// New LogisticRegression object - public LogisticRegression SetLabelCol(string value) => - WrapAsLogisticRegression(Reference.Invoke("setLabelCol", (object)value)); - - /// - /// Sets lowerBoundsOnCoefficients value for - /// - /// - /// The lower bounds on coefficients if fitting under bound constrained optimization. - /// - /// New LogisticRegression object - public LogisticRegression SetLowerBoundsOnCoefficients(object value) => - WrapAsLogisticRegression(Reference.Invoke("setLowerBoundsOnCoefficients", (object)value)); - - /// - /// Sets lowerBoundsOnIntercepts value for - /// - /// - /// The lower bounds on intercepts if fitting under bound constrained optimization. - /// - /// New LogisticRegression object - public LogisticRegression SetLowerBoundsOnIntercepts(object value) => - WrapAsLogisticRegression(Reference.Invoke("setLowerBoundsOnIntercepts", (object)value)); - - /// - /// Sets maxBlockSizeInMB value for - /// - /// - /// Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0. - /// - /// New LogisticRegression object - public LogisticRegression SetMaxBlockSizeInMB(double value) => - WrapAsLogisticRegression(Reference.Invoke("setMaxBlockSizeInMB", (object)value)); - - /// - /// Sets maxIter value for - /// - /// - /// maximum number of iterations (>= 0) - /// - /// New LogisticRegression object - public LogisticRegression SetMaxIter(int value) => - WrapAsLogisticRegression(Reference.Invoke("setMaxIter", (object)value)); - - /// - /// Sets predictionCol value for - /// - /// - /// prediction column name - /// - /// New LogisticRegression object - public LogisticRegression SetPredictionCol(string value) => - WrapAsLogisticRegression(Reference.Invoke("setPredictionCol", (object)value)); - - /// - /// Sets probabilityCol value for - /// - /// - /// Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities - /// - /// New LogisticRegression object - public LogisticRegression SetProbabilityCol(string value) => - WrapAsLogisticRegression(Reference.Invoke("setProbabilityCol", (object)value)); - - /// - /// Sets rawPredictionCol value for - /// - /// - /// raw prediction (a.k.a. confidence) column name - /// - /// New LogisticRegression object - public LogisticRegression SetRawPredictionCol(string value) => - WrapAsLogisticRegression(Reference.Invoke("setRawPredictionCol", (object)value)); - - /// - /// Sets regParam value for - /// - /// - /// regularization parameter (>= 0) - /// - /// New LogisticRegression object - public LogisticRegression SetRegParam(double value) => - WrapAsLogisticRegression(Reference.Invoke("setRegParam", (object)value)); - - /// - /// Sets standardization value for - /// - /// - /// whether to standardize the training features before fitting the model - /// - /// New LogisticRegression object - public LogisticRegression SetStandardization(bool value) => - WrapAsLogisticRegression(Reference.Invoke("setStandardization", (object)value)); - - /// - /// Sets threshold value for - /// - /// - /// threshold in binary classification prediction, in range [0, 1] - /// - /// New LogisticRegression object - public LogisticRegression SetThreshold(double value) => - WrapAsLogisticRegression(Reference.Invoke("setThreshold", (object)value)); - - /// - /// Sets thresholds value for - /// - /// - /// Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold - /// - /// New LogisticRegression object - public LogisticRegression SetThresholds(double[] value) => - WrapAsLogisticRegression(Reference.Invoke("setThresholds", (object)value)); - - /// - /// Sets tol value for - /// - /// - /// the convergence tolerance for iterative algorithms (>= 0) - /// - /// New LogisticRegression object - public LogisticRegression SetTol(double value) => - WrapAsLogisticRegression(Reference.Invoke("setTol", (object)value)); - - /// - /// Sets upperBoundsOnCoefficients value for - /// - /// - /// The upper bounds on coefficients if fitting under bound constrained optimization. - /// - /// New LogisticRegression object - public LogisticRegression SetUpperBoundsOnCoefficients(object value) => - WrapAsLogisticRegression(Reference.Invoke("setUpperBoundsOnCoefficients", (object)value)); - - /// - /// Sets upperBoundsOnIntercepts value for - /// - /// - /// The upper bounds on intercepts if fitting under bound constrained optimization. - /// - /// New LogisticRegression object - public LogisticRegression SetUpperBoundsOnIntercepts(object value) => - WrapAsLogisticRegression(Reference.Invoke("setUpperBoundsOnIntercepts", (object)value)); - - /// - /// Sets weightCol value for - /// - /// - /// weight column name. If this is not set or empty, we treat all instance weights as 1.0 - /// - /// New LogisticRegression object - public LogisticRegression SetWeightCol(string value) => - WrapAsLogisticRegression(Reference.Invoke("setWeightCol", (object)value)); - - - /// - /// Gets aggregationDepth value for - /// - /// - /// aggregationDepth: suggested depth for treeAggregate (>= 2) - /// - public int GetAggregationDepth() => - (int)Reference.Invoke("getAggregationDepth"); - - - /// - /// Gets elasticNetParam value for - /// - /// - /// elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty - /// - public double GetElasticNetParam() => - (double)Reference.Invoke("getElasticNetParam"); - - - /// - /// Gets family value for - /// - /// - /// family: The name of family which is a description of the label distribution to be used in the model. Supported options: auto, binomial, multinomial. - /// - public string GetFamily() => - (string)Reference.Invoke("getFamily"); - - - /// - /// Gets featuresCol value for - /// - /// - /// featuresCol: features column name - /// - public string GetFeaturesCol() => - (string)Reference.Invoke("getFeaturesCol"); - - - /// - /// Gets fitIntercept value for - /// - /// - /// fitIntercept: whether to fit an intercept term - /// - public bool GetFitIntercept() => - (bool)Reference.Invoke("getFitIntercept"); - - - /// - /// Gets labelCol value for - /// - /// - /// labelCol: label column name - /// - public string GetLabelCol() => - (string)Reference.Invoke("getLabelCol"); - - - /// - /// Gets lowerBoundsOnCoefficients value for - /// - /// - /// lowerBoundsOnCoefficients: The lower bounds on coefficients if fitting under bound constrained optimization. - /// - public object GetLowerBoundsOnCoefficients() => - (object)Reference.Invoke("getLowerBoundsOnCoefficients"); - - - /// - /// Gets lowerBoundsOnIntercepts value for - /// - /// - /// lowerBoundsOnIntercepts: The lower bounds on intercepts if fitting under bound constrained optimization. - /// - public object GetLowerBoundsOnIntercepts() => - (object)Reference.Invoke("getLowerBoundsOnIntercepts"); - - - /// - /// Gets maxBlockSizeInMB value for - /// - /// - /// maxBlockSizeInMB: Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0. - /// - public double GetMaxBlockSizeInMB() => - (double)Reference.Invoke("getMaxBlockSizeInMB"); - - - /// - /// Gets maxIter value for - /// - /// - /// maxIter: maximum number of iterations (>= 0) - /// - public int GetMaxIter() => - (int)Reference.Invoke("getMaxIter"); - - - /// - /// Gets predictionCol value for - /// - /// - /// predictionCol: prediction column name - /// - public string GetPredictionCol() => - (string)Reference.Invoke("getPredictionCol"); - - - /// - /// Gets probabilityCol value for - /// - /// - /// probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities - /// - public string GetProbabilityCol() => - (string)Reference.Invoke("getProbabilityCol"); - - - /// - /// Gets rawPredictionCol value for - /// - /// - /// rawPredictionCol: raw prediction (a.k.a. confidence) column name - /// - public string GetRawPredictionCol() => - (string)Reference.Invoke("getRawPredictionCol"); - - - /// - /// Gets regParam value for - /// - /// - /// regParam: regularization parameter (>= 0) - /// - public double GetRegParam() => - (double)Reference.Invoke("getRegParam"); - - - /// - /// Gets standardization value for - /// - /// - /// standardization: whether to standardize the training features before fitting the model - /// - public bool GetStandardization() => - (bool)Reference.Invoke("getStandardization"); - - - /// - /// Gets threshold value for - /// - /// - /// threshold: threshold in binary classification prediction, in range [0, 1] - /// - public double GetThreshold() => - (double)Reference.Invoke("getThreshold"); - - - /// - /// Gets thresholds value for - /// - /// - /// thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold - /// - public double[] GetThresholds() => - (double[])Reference.Invoke("getThresholds"); - - - /// - /// Gets tol value for - /// - /// - /// tol: the convergence tolerance for iterative algorithms (>= 0) - /// - public double GetTol() => - (double)Reference.Invoke("getTol"); - - - /// - /// Gets upperBoundsOnCoefficients value for - /// - /// - /// upperBoundsOnCoefficients: The upper bounds on coefficients if fitting under bound constrained optimization. - /// - public object GetUpperBoundsOnCoefficients() => - (object)Reference.Invoke("getUpperBoundsOnCoefficients"); - - - /// - /// Gets upperBoundsOnIntercepts value for - /// - /// - /// upperBoundsOnIntercepts: The upper bounds on intercepts if fitting under bound constrained optimization. - /// - public object GetUpperBoundsOnIntercepts() => - (object)Reference.Invoke("getUpperBoundsOnIntercepts"); - - - /// - /// Gets weightCol value for - /// - /// - /// weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0 - /// - public string GetWeightCol() => - (string)Reference.Invoke("getWeightCol"); - - /// Fits a model to the input data. - /// The to fit the model to. - /// - override public LogisticRegressionModel Fit(DataFrame dataset) => - new LogisticRegressionModel( - (JvmObjectReference)Reference.Invoke("fit", dataset)); - - /// - /// Loads the that was previously saved using Save(string). - /// - /// The path the previous was saved to - /// New object, loaded from path. - public static LogisticRegression Load(string path) => WrapAsLogisticRegression( - SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); - - /// - /// Saves the object so that it can be loaded later using Load. Note that these objects - /// can be shared with Scala by Loading or Saving in Scala. - /// - /// The path to save the object to - public void Save(string path) => Reference.Invoke("save", path); - - /// a instance for this ML instance. - public JavaMLWriter Write() => - new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - - /// - /// Get the corresponding JavaMLReader instance. - /// - /// an instance for this ML instance. - public JavaMLReader Read() => - new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); - - private static LogisticRegression WrapAsLogisticRegression(object obj) => - new LogisticRegression((JvmObjectReference)obj); - - - } -} diff --git a/core/src/main/dotnet/src/org/apache/spark/ml/classification/LogisticRegressionModel.cs b/core/src/main/dotnet/src/org/apache/spark/ml/classification/LogisticRegressionModel.cs deleted file mode 100644 index 30c5014b50..0000000000 --- a/core/src/main/dotnet/src/org/apache/spark/ml/classification/LogisticRegressionModel.cs +++ /dev/null @@ -1,497 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Interop.Internal.Java.Util; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Sql.Types; -using Microsoft.Spark.Utils; -using SynapseML.Dotnet.Utils; -using Synapse.ML.LightGBM.Param; - - -namespace Microsoft.Spark.ML.Classification -{ - /// - /// implements LogisticRegressionModel - /// - public class LogisticRegressionModel : JavaModel, IJavaMLWritable, IJavaMLReadable - { - private static readonly string s_className = "org.apache.spark.ml.classification.LogisticRegressionModel"; - - // // TODO: support this after constructing Vector and Matrix class in .NET - // /// - // /// Creates a with a UID that is used to give the - // /// a unique ID. - // /// - // /// An immutable unique ID for the object and its derivatives. - // public LogisticRegressionModel(string uid, Matrix coefficientMatrix, Vector interceptVector, int numClasses, bool isMultinomial) - // : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, uid)) - // { - // } - - internal LogisticRegressionModel(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - /// - /// Sets aggregationDepth value for - /// - /// - /// suggested depth for treeAggregate (>= 2) - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetAggregationDepth(int value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setAggregationDepth", (object)value)); - - /// - /// Sets elasticNetParam value for - /// - /// - /// the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetElasticNetParam(double value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setElasticNetParam", (object)value)); - - /// - /// Sets family value for - /// - /// - /// The name of family which is a description of the label distribution to be used in the model. Supported options: auto, binomial, multinomial. - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetFamily(string value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setFamily", (object)value)); - - /// - /// Sets featuresCol value for - /// - /// - /// features column name - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetFeaturesCol(string value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setFeaturesCol", (object)value)); - - /// - /// Sets fitIntercept value for - /// - /// - /// whether to fit an intercept term - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetFitIntercept(bool value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setFitIntercept", (object)value)); - - /// - /// Sets labelCol value for - /// - /// - /// label column name - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetLabelCol(string value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setLabelCol", (object)value)); - - /// - /// Sets lowerBoundsOnCoefficients value for - /// - /// - /// The lower bounds on coefficients if fitting under bound constrained optimization. - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetLowerBoundsOnCoefficients(object value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setLowerBoundsOnCoefficients", (object)value)); - - /// - /// Sets lowerBoundsOnIntercepts value for - /// - /// - /// The lower bounds on intercepts if fitting under bound constrained optimization. - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetLowerBoundsOnIntercepts(object value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setLowerBoundsOnIntercepts", (object)value)); - - /// - /// Sets maxBlockSizeInMB value for - /// - /// - /// Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0. - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetMaxBlockSizeInMB(double value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setMaxBlockSizeInMB", (object)value)); - - /// - /// Sets maxIter value for - /// - /// - /// maximum number of iterations (>= 0) - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetMaxIter(int value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setMaxIter", (object)value)); - - /// - /// Sets predictionCol value for - /// - /// - /// prediction column name - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetPredictionCol(string value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setPredictionCol", (object)value)); - - /// - /// Sets probabilityCol value for - /// - /// - /// Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetProbabilityCol(string value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setProbabilityCol", (object)value)); - - /// - /// Sets rawPredictionCol value for - /// - /// - /// raw prediction (a.k.a. confidence) column name - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetRawPredictionCol(string value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setRawPredictionCol", (object)value)); - - /// - /// Sets regParam value for - /// - /// - /// regularization parameter (>= 0) - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetRegParam(double value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setRegParam", (object)value)); - - /// - /// Sets standardization value for - /// - /// - /// whether to standardize the training features before fitting the model - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetStandardization(bool value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setStandardization", (object)value)); - - /// - /// Sets threshold value for - /// - /// - /// threshold in binary classification prediction, in range [0, 1] - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetThreshold(double value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setThreshold", (object)value)); - - /// - /// Sets thresholds value for - /// - /// - /// Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetThresholds(double[] value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setThresholds", (object)value)); - - /// - /// Sets tol value for - /// - /// - /// the convergence tolerance for iterative algorithms (>= 0) - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetTol(double value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setTol", (object)value)); - - /// - /// Sets upperBoundsOnCoefficients value for - /// - /// - /// The upper bounds on coefficients if fitting under bound constrained optimization. - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetUpperBoundsOnCoefficients(object value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setUpperBoundsOnCoefficients", (object)value)); - - /// - /// Sets upperBoundsOnIntercepts value for - /// - /// - /// The upper bounds on intercepts if fitting under bound constrained optimization. - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetUpperBoundsOnIntercepts(object value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setUpperBoundsOnIntercepts", (object)value)); - - /// - /// Sets weightCol value for - /// - /// - /// weight column name. If this is not set or empty, we treat all instance weights as 1.0 - /// - /// New LogisticRegressionModel object - public LogisticRegressionModel SetWeightCol(string value) => - WrapAsLogisticRegressionModel(Reference.Invoke("setWeightCol", (object)value)); - - - /// - /// Gets aggregationDepth value for - /// - /// - /// aggregationDepth: suggested depth for treeAggregate (>= 2) - /// - public int GetAggregationDepth() => - (int)Reference.Invoke("getAggregationDepth"); - - - /// - /// Gets elasticNetParam value for - /// - /// - /// elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty - /// - public double GetElasticNetParam() => - (double)Reference.Invoke("getElasticNetParam"); - - - /// - /// Gets family value for - /// - /// - /// family: The name of family which is a description of the label distribution to be used in the model. Supported options: auto, binomial, multinomial. - /// - public string GetFamily() => - (string)Reference.Invoke("getFamily"); - - - /// - /// Gets featuresCol value for - /// - /// - /// featuresCol: features column name - /// - public string GetFeaturesCol() => - (string)Reference.Invoke("getFeaturesCol"); - - - /// - /// Gets fitIntercept value for - /// - /// - /// fitIntercept: whether to fit an intercept term - /// - public bool GetFitIntercept() => - (bool)Reference.Invoke("getFitIntercept"); - - - /// - /// Gets labelCol value for - /// - /// - /// labelCol: label column name - /// - public string GetLabelCol() => - (string)Reference.Invoke("getLabelCol"); - - - /// - /// Gets lowerBoundsOnCoefficients value for - /// - /// - /// lowerBoundsOnCoefficients: The lower bounds on coefficients if fitting under bound constrained optimization. - /// - public object GetLowerBoundsOnCoefficients() => - (object)Reference.Invoke("getLowerBoundsOnCoefficients"); - - - /// - /// Gets lowerBoundsOnIntercepts value for - /// - /// - /// lowerBoundsOnIntercepts: The lower bounds on intercepts if fitting under bound constrained optimization. - /// - public object GetLowerBoundsOnIntercepts() => - (object)Reference.Invoke("getLowerBoundsOnIntercepts"); - - - /// - /// Gets maxBlockSizeInMB value for - /// - /// - /// maxBlockSizeInMB: Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0. - /// - public double GetMaxBlockSizeInMB() => - (double)Reference.Invoke("getMaxBlockSizeInMB"); - - - /// - /// Gets maxIter value for - /// - /// - /// maxIter: maximum number of iterations (>= 0) - /// - public int GetMaxIter() => - (int)Reference.Invoke("getMaxIter"); - - - /// - /// Gets predictionCol value for - /// - /// - /// predictionCol: prediction column name - /// - public string GetPredictionCol() => - (string)Reference.Invoke("getPredictionCol"); - - - /// - /// Gets probabilityCol value for - /// - /// - /// probabilityCol: Column name for predicted class conditional probabilities. Note: Not all models output well-calibrated probability estimates! These probabilities should be treated as confidences, not precise probabilities - /// - public string GetProbabilityCol() => - (string)Reference.Invoke("getProbabilityCol"); - - - /// - /// Gets rawPredictionCol value for - /// - /// - /// rawPredictionCol: raw prediction (a.k.a. confidence) column name - /// - public string GetRawPredictionCol() => - (string)Reference.Invoke("getRawPredictionCol"); - - - /// - /// Gets regParam value for - /// - /// - /// regParam: regularization parameter (>= 0) - /// - public double GetRegParam() => - (double)Reference.Invoke("getRegParam"); - - - /// - /// Gets standardization value for - /// - /// - /// standardization: whether to standardize the training features before fitting the model - /// - public bool GetStandardization() => - (bool)Reference.Invoke("getStandardization"); - - - /// - /// Gets threshold value for - /// - /// - /// threshold: threshold in binary classification prediction, in range [0, 1] - /// - public double GetThreshold() => - (double)Reference.Invoke("getThreshold"); - - - /// - /// Gets thresholds value for - /// - /// - /// thresholds: Thresholds in multi-class classification to adjust the probability of predicting each class. Array must have length equal to the number of classes, with values > 0 excepting that at most one value may be 0. The class with largest value p/t is predicted, where p is the original probability of that class and t is the class's threshold - /// - public double[] GetThresholds() => - (double[])Reference.Invoke("getThresholds"); - - - /// - /// Gets tol value for - /// - /// - /// tol: the convergence tolerance for iterative algorithms (>= 0) - /// - public double GetTol() => - (double)Reference.Invoke("getTol"); - - - /// - /// Gets upperBoundsOnCoefficients value for - /// - /// - /// upperBoundsOnCoefficients: The upper bounds on coefficients if fitting under bound constrained optimization. - /// - public object GetUpperBoundsOnCoefficients() => - (object)Reference.Invoke("getUpperBoundsOnCoefficients"); - - - /// - /// Gets upperBoundsOnIntercepts value for - /// - /// - /// upperBoundsOnIntercepts: The upper bounds on intercepts if fitting under bound constrained optimization. - /// - public object GetUpperBoundsOnIntercepts() => - (object)Reference.Invoke("getUpperBoundsOnIntercepts"); - - - /// - /// Gets weightCol value for - /// - /// - /// weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0 - /// - public string GetWeightCol() => - (string)Reference.Invoke("getWeightCol"); - - - /// - /// Loads the that was previously saved using Save(string). - /// - /// The path the previous was saved to - /// New object, loaded from path. - public static LogisticRegressionModel Load(string path) => WrapAsLogisticRegressionModel( - SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); - - /// - /// Saves the object so that it can be loaded later using Load. Note that these objects - /// can be shared with Scala by Loading or Saving in Scala. - /// - /// The path to save the object to - public void Save(string path) => Reference.Invoke("save", path); - - /// a instance for this ML instance. - public JavaMLWriter Write() => - new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - - /// - /// Get the corresponding JavaMLReader instance. - /// - /// an instance for this ML instance. - public JavaMLReader Read() => - new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); - - private static LogisticRegressionModel WrapAsLogisticRegressionModel(object obj) => - new LogisticRegressionModel((JvmObjectReference)obj); - - - } -} diff --git a/core/src/main/dotnet/src/org/apache/spark/ml/feature/StringIndexer.cs b/core/src/main/dotnet/src/org/apache/spark/ml/feature/StringIndexer.cs deleted file mode 100644 index e933b601f1..0000000000 --- a/core/src/main/dotnet/src/org/apache/spark/ml/feature/StringIndexer.cs +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Interop.Internal.Java.Util; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Sql.Types; -using Microsoft.Spark.Utils; -using SynapseML.Dotnet.Utils; -using Synapse.ML.LightGBM.Param; - -namespace Microsoft.Spark.ML.Feature -{ - /// - /// implements StringIndexer - /// - public class StringIndexer : JavaEstimator, IJavaMLWritable, IJavaMLReadable - { - private static readonly string s_className = "org.apache.spark.ml.feature.StringIndexer"; - - /// - /// Creates a without any parameters. - /// - public StringIndexer() : base(s_className) - { - } - - /// - /// Creates a with a UID that is used to give the - /// a unique ID. - /// - /// An immutable unique ID for the object and its derivatives. - public StringIndexer(string uid) : base(s_className, uid) - { - } - - internal StringIndexer(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - /// - /// Sets handleInvalid value for - /// - /// - /// How to handle invalid data (unseen labels or NULL values). Options are 'skip' (filter out rows with invalid data), error (throw an error), or 'keep' (put invalid data in a special additional bucket, at index numLabels). - /// - /// New StringIndexer object - public StringIndexer SetHandleInvalid(string value) => - WrapAsStringIndexer(Reference.Invoke("setHandleInvalid", (object)value)); - - /// - /// Sets inputCol value for - /// - /// - /// input column name - /// - /// New StringIndexer object - public StringIndexer SetInputCol(string value) => - WrapAsStringIndexer(Reference.Invoke("setInputCol", (object)value)); - - /// - /// Sets inputCols value for - /// - /// - /// input column names - /// - /// New StringIndexer object - public StringIndexer SetInputCols(string[] value) => - WrapAsStringIndexer(Reference.Invoke("setInputCols", (object)value)); - - /// - /// Sets outputCol value for - /// - /// - /// output column name - /// - /// New StringIndexer object - public StringIndexer SetOutputCol(string value) => - WrapAsStringIndexer(Reference.Invoke("setOutputCol", (object)value)); - - /// - /// Sets outputCols value for - /// - /// - /// output column names - /// - /// New StringIndexer object - public StringIndexer SetOutputCols(string[] value) => - WrapAsStringIndexer(Reference.Invoke("setOutputCols", (object)value)); - - /// - /// Sets stringOrderType value for - /// - /// - /// How to order labels of string column. The first label after ordering is assigned an index of 0. Supported options: frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc. - /// - /// New StringIndexer object - public StringIndexer SetStringOrderType(string value) => - WrapAsStringIndexer(Reference.Invoke("setStringOrderType", (object)value)); - - - /// - /// Gets handleInvalid value for - /// - /// - /// handleInvalid: How to handle invalid data (unseen labels or NULL values). Options are 'skip' (filter out rows with invalid data), error (throw an error), or 'keep' (put invalid data in a special additional bucket, at index numLabels). - /// - public string GetHandleInvalid() => - (string)Reference.Invoke("getHandleInvalid"); - - - /// - /// Gets inputCol value for - /// - /// - /// inputCol: input column name - /// - public string GetInputCol() => - (string)Reference.Invoke("getInputCol"); - - - /// - /// Gets inputCols value for - /// - /// - /// inputCols: input column names - /// - public string[] GetInputCols() => - (string[])Reference.Invoke("getInputCols"); - - - /// - /// Gets outputCol value for - /// - /// - /// outputCol: output column name - /// - public string GetOutputCol() => - (string)Reference.Invoke("getOutputCol"); - - - /// - /// Gets outputCols value for - /// - /// - /// outputCols: output column names - /// - public string[] GetOutputCols() => - (string[])Reference.Invoke("getOutputCols"); - - - /// - /// Gets stringOrderType value for - /// - /// - /// stringOrderType: How to order labels of string column. The first label after ordering is assigned an index of 0. Supported options: frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc. - /// - public string GetStringOrderType() => - (string)Reference.Invoke("getStringOrderType"); - - /// Fits a model to the input data. - /// The to fit the model to. - /// - override public StringIndexerModel Fit(DataFrame dataset) => - new StringIndexerModel( - (JvmObjectReference)Reference.Invoke("fit", dataset)); - - /// - /// Loads the that was previously saved using Save(string). - /// - /// The path the previous was saved to - /// New object, loaded from path. - public static StringIndexer Load(string path) => WrapAsStringIndexer( - SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); - - /// - /// Saves the object so that it can be loaded later using Load. Note that these objects - /// can be shared with Scala by Loading or Saving in Scala. - /// - /// The path to save the object to - public void Save(string path) => Reference.Invoke("save", path); - - /// a instance for this ML instance. - public JavaMLWriter Write() => - new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - - /// - /// Get the corresponding JavaMLReader instance. - /// - /// an instance for this ML instance. - public JavaMLReader Read() => - new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); - - private static StringIndexer WrapAsStringIndexer(object obj) => - new StringIndexer((JvmObjectReference)obj); - - - } -} diff --git a/core/src/main/dotnet/src/org/apache/spark/ml/feature/StringIndexerModel.cs b/core/src/main/dotnet/src/org/apache/spark/ml/feature/StringIndexerModel.cs deleted file mode 100644 index d9fc66417d..0000000000 --- a/core/src/main/dotnet/src/org/apache/spark/ml/feature/StringIndexerModel.cs +++ /dev/null @@ -1,225 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Interop.Internal.Java.Util; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Sql.Types; -using Microsoft.Spark.Utils; -using SynapseML.Dotnet.Utils; -using Synapse.ML.LightGBM.Param; - - -namespace Microsoft.Spark.ML.Feature -{ - /// - /// implements StringIndexerModel - /// - public class StringIndexerModel : JavaModel, IJavaMLWritable, IJavaMLReadable - { - private static readonly string s_className = "org.apache.spark.ml.feature.StringIndexerModel"; - - /// - /// Creates a with a UID that is used to give the - /// a unique ID. - /// - /// An immutable unique ID for the object and its derivatives. - /// Array of ordered list of labels, corresponding to indices to be assigned for each input column. - public StringIndexerModel(string uid, string[][] labelsArray) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, uid, labelsArray)) - { - } - - /// - /// Creates a with a UID that is used to give the - /// a unique ID. - /// - /// An immutable unique ID for the object and its derivatives. - /// Array of ordered list of labels, corresponding to indices to be assigned for each input column. - public StringIndexerModel(string uid, string[] labels) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, uid, labels)) - { - } - - - /// - /// Creates a without any parameters. - /// - public StringIndexerModel(string[][] labelsArray) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, labelsArray)) - { - } - - /// - /// Creates a without any parameters. - /// - public StringIndexerModel(string[] labels) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, labels)) - { - } - - internal StringIndexerModel(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - /// - /// Sets handleInvalid value for - /// - /// - /// How to handle invalid data (unseen labels or NULL values). Options are 'skip' (filter out rows with invalid data), error (throw an error), or 'keep' (put invalid data in a special additional bucket, at index numLabels). - /// - /// New StringIndexerModel object - public StringIndexerModel SetHandleInvalid(string value) => - WrapAsStringIndexerModel(Reference.Invoke("setHandleInvalid", (object)value)); - - /// - /// Sets inputCol value for - /// - /// - /// input column name - /// - /// New StringIndexerModel object - public StringIndexerModel SetInputCol(string value) => - WrapAsStringIndexerModel(Reference.Invoke("setInputCol", (object)value)); - - /// - /// Sets inputCols value for - /// - /// - /// input column names - /// - /// New StringIndexerModel object - public StringIndexerModel SetInputCols(string[] value) => - WrapAsStringIndexerModel(Reference.Invoke("setInputCols", (object)value)); - - /// - /// Sets outputCol value for - /// - /// - /// output column name - /// - /// New StringIndexerModel object - public StringIndexerModel SetOutputCol(string value) => - WrapAsStringIndexerModel(Reference.Invoke("setOutputCol", (object)value)); - - /// - /// Sets outputCols value for - /// - /// - /// output column names - /// - /// New StringIndexerModel object - public StringIndexerModel SetOutputCols(string[] value) => - WrapAsStringIndexerModel(Reference.Invoke("setOutputCols", (object)value)); - - /// - /// Sets stringOrderType value for - /// - /// - /// How to order labels of string column. The first label after ordering is assigned an index of 0. Supported options: frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc. - /// - /// New StringIndexerModel object - public StringIndexerModel SetStringOrderType(string value) => - WrapAsStringIndexerModel(Reference.Invoke("setStringOrderType", (object)value)); - - - /// - /// Gets handleInvalid value for - /// - /// - /// handleInvalid: How to handle invalid data (unseen labels or NULL values). Options are 'skip' (filter out rows with invalid data), error (throw an error), or 'keep' (put invalid data in a special additional bucket, at index numLabels). - /// - public string GetHandleInvalid() => - (string)Reference.Invoke("getHandleInvalid"); - - - /// - /// Gets inputCol value for - /// - /// - /// inputCol: input column name - /// - public string GetInputCol() => - (string)Reference.Invoke("getInputCol"); - - - /// - /// Gets inputCols value for - /// - /// - /// inputCols: input column names - /// - public string[] GetInputCols() => - (string[])Reference.Invoke("getInputCols"); - - - /// - /// Gets outputCol value for - /// - /// - /// outputCol: output column name - /// - public string GetOutputCol() => - (string)Reference.Invoke("getOutputCol"); - - - /// - /// Gets outputCols value for - /// - /// - /// outputCols: output column names - /// - public string[] GetOutputCols() => - (string[])Reference.Invoke("getOutputCols"); - - - /// - /// Gets stringOrderType value for - /// - /// - /// stringOrderType: How to order labels of string column. The first label after ordering is assigned an index of 0. Supported options: frequencyDesc, frequencyAsc, alphabetDesc, alphabetAsc. - /// - public string GetStringOrderType() => - (string)Reference.Invoke("getStringOrderType"); - - - /// - /// Loads the that was previously saved using Save(string). - /// - /// The path the previous was saved to - /// New object, loaded from path. - public static StringIndexerModel Load(string path) => WrapAsStringIndexerModel( - SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); - - /// - /// Saves the object so that it can be loaded later using Load. Note that these objects - /// can be shared with Scala by Loading or Saving in Scala. - /// - /// The path to save the object to - public void Save(string path) => Reference.Invoke("save", path); - - /// a instance for this ML instance. - public JavaMLWriter Write() => - new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - - /// - /// Get the corresponding JavaMLReader instance. - /// - /// an instance for this ML instance. - public JavaMLReader Read() => - new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); - - private static StringIndexerModel WrapAsStringIndexerModel(object obj) => - new StringIndexerModel((JvmObjectReference)obj); - - - } -} diff --git a/core/src/main/dotnet/src/org/apache/spark/ml/recommendation/ALS.cs b/core/src/main/dotnet/src/org/apache/spark/ml/recommendation/ALS.cs deleted file mode 100644 index ab29a2e4f9..0000000000 --- a/core/src/main/dotnet/src/org/apache/spark/ml/recommendation/ALS.cs +++ /dev/null @@ -1,448 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Interop.Internal.Java.Util; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Sql.Types; -using Microsoft.Spark.Utils; -using SynapseML.Dotnet.Utils; -using Synapse.ML.LightGBM.Param; -using Microsoft.Spark.ML.Recommendation; - -namespace Microsoft.Spark.ML.Recommendation -{ - /// - /// implements ALS - /// - public class ALS : JavaEstimator, IJavaMLWritable, IJavaMLReadable - { - private static readonly string s_className = "org.apache.spark.ml.recommendation.ALS"; - - /// - /// Creates a without any parameters. - /// - public ALS() : base(s_className) - { - } - - /// - /// Creates a with a UID that is used to give the - /// a unique ID. - /// - /// An immutable unique ID for the object and its derivatives. - public ALS(string uid) : base(s_className, uid) - { - } - - internal ALS(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - /// - /// Sets alpha value for - /// - /// - /// alpha for implicit preference - /// - /// New ALS object - public ALS SetAlpha(double value) => - WrapAsALS(Reference.Invoke("setAlpha", (object)value)); - - /// - /// Sets blockSize value for - /// - /// - /// block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data. - /// - /// New ALS object - public ALS SetBlockSize(int value) => - WrapAsALS(Reference.Invoke("setBlockSize", (object)value)); - - /// - /// Sets checkpointInterval value for - /// - /// - /// set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext - /// - /// New ALS object - public ALS SetCheckpointInterval(int value) => - WrapAsALS(Reference.Invoke("setCheckpointInterval", (object)value)); - - /// - /// Sets coldStartStrategy value for - /// - /// - /// strategy for dealing with unknown or new users/items at prediction time. This may be useful in cross-validation or production scenarios, for handling user/item ids the model has not seen in the training data. Supported values: nan,drop. - /// - /// New ALS object - public ALS SetColdStartStrategy(string value) => - WrapAsALS(Reference.Invoke("setColdStartStrategy", (object)value)); - - /// - /// Sets finalStorageLevel value for - /// - /// - /// StorageLevel for ALS model factors. - /// - /// New ALS object - public ALS SetFinalStorageLevel(string value) => - WrapAsALS(Reference.Invoke("setFinalStorageLevel", (object)value)); - - /// - /// Sets implicitPrefs value for - /// - /// - /// whether to use implicit preference - /// - /// New ALS object - public ALS SetImplicitPrefs(bool value) => - WrapAsALS(Reference.Invoke("setImplicitPrefs", (object)value)); - - /// - /// Sets intermediateStorageLevel value for - /// - /// - /// StorageLevel for intermediate datasets. Cannot be 'NONE'. - /// - /// New ALS object - public ALS SetIntermediateStorageLevel(string value) => - WrapAsALS(Reference.Invoke("setIntermediateStorageLevel", (object)value)); - - /// - /// Sets itemCol value for - /// - /// - /// column name for item ids. Ids must be within the integer value range. - /// - /// New ALS object - public ALS SetItemCol(string value) => - WrapAsALS(Reference.Invoke("setItemCol", (object)value)); - - /// - /// Sets maxIter value for - /// - /// - /// maximum number of iterations (>= 0) - /// - /// New ALS object - public ALS SetMaxIter(int value) => - WrapAsALS(Reference.Invoke("setMaxIter", (object)value)); - - /// - /// Sets nonnegative value for - /// - /// - /// whether to use nonnegative constraint for least squares - /// - /// New ALS object - public ALS SetNonnegative(bool value) => - WrapAsALS(Reference.Invoke("setNonnegative", (object)value)); - - /// - /// Sets numItemBlocks value for - /// - /// - /// number of item blocks - /// - /// New ALS object - public ALS SetNumItemBlocks(int value) => - WrapAsALS(Reference.Invoke("setNumItemBlocks", (object)value)); - - /// - /// Sets numUserBlocks value for - /// - /// - /// number of user blocks - /// - /// New ALS object - public ALS SetNumUserBlocks(int value) => - WrapAsALS(Reference.Invoke("setNumUserBlocks", (object)value)); - - /// - /// Sets predictionCol value for - /// - /// - /// prediction column name - /// - /// New ALS object - public ALS SetPredictionCol(string value) => - WrapAsALS(Reference.Invoke("setPredictionCol", (object)value)); - - /// - /// Sets rank value for - /// - /// - /// rank of the factorization - /// - /// New ALS object - public ALS SetRank(int value) => - WrapAsALS(Reference.Invoke("setRank", (object)value)); - - /// - /// Sets ratingCol value for - /// - /// - /// column name for ratings - /// - /// New ALS object - public ALS SetRatingCol(string value) => - WrapAsALS(Reference.Invoke("setRatingCol", (object)value)); - - /// - /// Sets regParam value for - /// - /// - /// regularization parameter (>= 0) - /// - /// New ALS object - public ALS SetRegParam(double value) => - WrapAsALS(Reference.Invoke("setRegParam", (object)value)); - - /// - /// Sets seed value for - /// - /// - /// random seed - /// - /// New ALS object - public ALS SetSeed(long value) => - WrapAsALS(Reference.Invoke("setSeed", (object)value)); - - /// - /// Sets userCol value for - /// - /// - /// column name for user ids. Ids must be within the integer value range. - /// - /// New ALS object - public ALS SetUserCol(string value) => - WrapAsALS(Reference.Invoke("setUserCol", (object)value)); - - - /// - /// Gets alpha value for - /// - /// - /// alpha: alpha for implicit preference - /// - public double GetAlpha() => - (double)Reference.Invoke("getAlpha"); - - - /// - /// Gets blockSize value for - /// - /// - /// blockSize: block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data. - /// - public int GetBlockSize() => - (int)Reference.Invoke("getBlockSize"); - - - /// - /// Gets checkpointInterval value for - /// - /// - /// checkpointInterval: set checkpoint interval (>= 1) or disable checkpoint (-1). E.g. 10 means that the cache will get checkpointed every 10 iterations. Note: this setting will be ignored if the checkpoint directory is not set in the SparkContext - /// - public int GetCheckpointInterval() => - (int)Reference.Invoke("getCheckpointInterval"); - - - /// - /// Gets coldStartStrategy value for - /// - /// - /// coldStartStrategy: strategy for dealing with unknown or new users/items at prediction time. This may be useful in cross-validation or production scenarios, for handling user/item ids the model has not seen in the training data. Supported values: nan,drop. - /// - public string GetColdStartStrategy() => - (string)Reference.Invoke("getColdStartStrategy"); - - - /// - /// Gets finalStorageLevel value for - /// - /// - /// finalStorageLevel: StorageLevel for ALS model factors. - /// - public string GetFinalStorageLevel() => - (string)Reference.Invoke("getFinalStorageLevel"); - - - /// - /// Gets implicitPrefs value for - /// - /// - /// implicitPrefs: whether to use implicit preference - /// - public bool GetImplicitPrefs() => - (bool)Reference.Invoke("getImplicitPrefs"); - - - /// - /// Gets intermediateStorageLevel value for - /// - /// - /// intermediateStorageLevel: StorageLevel for intermediate datasets. Cannot be 'NONE'. - /// - public string GetIntermediateStorageLevel() => - (string)Reference.Invoke("getIntermediateStorageLevel"); - - - /// - /// Gets itemCol value for - /// - /// - /// itemCol: column name for item ids. Ids must be within the integer value range. - /// - public string GetItemCol() => - (string)Reference.Invoke("getItemCol"); - - - /// - /// Gets maxIter value for - /// - /// - /// maxIter: maximum number of iterations (>= 0) - /// - public int GetMaxIter() => - (int)Reference.Invoke("getMaxIter"); - - - /// - /// Gets nonnegative value for - /// - /// - /// nonnegative: whether to use nonnegative constraint for least squares - /// - public bool GetNonnegative() => - (bool)Reference.Invoke("getNonnegative"); - - - /// - /// Gets numItemBlocks value for - /// - /// - /// numItemBlocks: number of item blocks - /// - public int GetNumItemBlocks() => - (int)Reference.Invoke("getNumItemBlocks"); - - - /// - /// Gets numUserBlocks value for - /// - /// - /// numUserBlocks: number of user blocks - /// - public int GetNumUserBlocks() => - (int)Reference.Invoke("getNumUserBlocks"); - - - /// - /// Gets predictionCol value for - /// - /// - /// predictionCol: prediction column name - /// - public string GetPredictionCol() => - (string)Reference.Invoke("getPredictionCol"); - - - /// - /// Gets rank value for - /// - /// - /// rank: rank of the factorization - /// - public int GetRank() => - (int)Reference.Invoke("getRank"); - - - /// - /// Gets ratingCol value for - /// - /// - /// ratingCol: column name for ratings - /// - public string GetRatingCol() => - (string)Reference.Invoke("getRatingCol"); - - - /// - /// Gets regParam value for - /// - /// - /// regParam: regularization parameter (>= 0) - /// - public double GetRegParam() => - (double)Reference.Invoke("getRegParam"); - - - /// - /// Gets seed value for - /// - /// - /// seed: random seed - /// - public long GetSeed() => - (long)Reference.Invoke("getSeed"); - - - /// - /// Gets userCol value for - /// - /// - /// userCol: column name for user ids. Ids must be within the integer value range. - /// - public string GetUserCol() => - (string)Reference.Invoke("getUserCol"); - - /// Fits a model to the input data. - /// The to fit the model to. - /// - override public ALSModel Fit(DataFrame dataset) => - new ALSModel( - (JvmObjectReference)Reference.Invoke("fit", dataset)); - - /// - /// Loads the that was previously saved using Save(string). - /// - /// The path the previous was saved to - /// New object, loaded from path. - public static ALS Load(string path) => WrapAsALS( - SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); - - /// - /// Saves the object so that it can be loaded later using Load. Note that these objects - /// can be shared with Scala by Loading or Saving in Scala. - /// - /// The path to save the object to - public void Save(string path) => Reference.Invoke("save", path); - - /// a instance for this ML instance. - public JavaMLWriter Write() => - new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - - /// - /// Get the corresponding JavaMLReader instance. - /// - /// an instance for this ML instance. - public JavaMLReader Read() => - new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); - - private static ALS WrapAsALS(object obj) => - new ALS((JvmObjectReference)obj); - - - } -} diff --git a/core/src/main/dotnet/src/org/apache/spark/ml/recommendation/ALSModel.cs b/core/src/main/dotnet/src/org/apache/spark/ml/recommendation/ALSModel.cs deleted file mode 100644 index 186f65cb33..0000000000 --- a/core/src/main/dotnet/src/org/apache/spark/ml/recommendation/ALSModel.cs +++ /dev/null @@ -1,179 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Interop.Internal.Java.Util; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Sql.Types; -using Microsoft.Spark.Utils; -using SynapseML.Dotnet.Utils; -using Synapse.ML.LightGBM.Param; - - -namespace Microsoft.Spark.ML.Recommendation -{ - /// - /// implements ALSModel - /// - public class ALSModel : JavaModel, IJavaMLWritable, IJavaMLReadable - { - private static readonly string s_className = "org.apache.spark.ml.recommendation.ALSModel"; - - /// - /// Creates a with a UID that is used to give the - /// a unique ID. - /// - /// An immutable unique ID for the object and its derivatives. - /// rank of the matrix factorization model. - /// a DataFrame that stores user factors in two columns: id and features. - /// a DataFrame that stores item factors in two columns: id and features. - public ALSModel(string uid, int rank, DataFrame userFactors, DataFrame itemFactors) - : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, uid, rank, userFactors, itemFactors)) - { - } - - internal ALSModel(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - /// - /// Sets blockSize value for - /// - /// - /// block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data. - /// - /// New ALSModel object - public ALSModel SetBlockSize(int value) => - WrapAsALSModel(Reference.Invoke("setBlockSize", (object)value)); - - /// - /// Sets coldStartStrategy value for - /// - /// - /// strategy for dealing with unknown or new users/items at prediction time. This may be useful in cross-validation or production scenarios, for handling user/item ids the model has not seen in the training data. Supported values: nan,drop. - /// - /// New ALSModel object - public ALSModel SetColdStartStrategy(string value) => - WrapAsALSModel(Reference.Invoke("setColdStartStrategy", (object)value)); - - /// - /// Sets itemCol value for - /// - /// - /// column name for item ids. Ids must be within the integer value range. - /// - /// New ALSModel object - public ALSModel SetItemCol(string value) => - WrapAsALSModel(Reference.Invoke("setItemCol", (object)value)); - - /// - /// Sets predictionCol value for - /// - /// - /// prediction column name - /// - /// New ALSModel object - public ALSModel SetPredictionCol(string value) => - WrapAsALSModel(Reference.Invoke("setPredictionCol", (object)value)); - - /// - /// Sets userCol value for - /// - /// - /// column name for user ids. Ids must be within the integer value range. - /// - /// New ALSModel object - public ALSModel SetUserCol(string value) => - WrapAsALSModel(Reference.Invoke("setUserCol", (object)value)); - - - /// - /// Gets blockSize value for - /// - /// - /// blockSize: block size for stacking input data in matrices. Data is stacked within partitions. If block size is more than remaining data in a partition then it is adjusted to the size of this data. - /// - public int GetBlockSize() => - (int)Reference.Invoke("getBlockSize"); - - - /// - /// Gets coldStartStrategy value for - /// - /// - /// coldStartStrategy: strategy for dealing with unknown or new users/items at prediction time. This may be useful in cross-validation or production scenarios, for handling user/item ids the model has not seen in the training data. Supported values: nan,drop. - /// - public string GetColdStartStrategy() => - (string)Reference.Invoke("getColdStartStrategy"); - - - /// - /// Gets itemCol value for - /// - /// - /// itemCol: column name for item ids. Ids must be within the integer value range. - /// - public string GetItemCol() => - (string)Reference.Invoke("getItemCol"); - - - /// - /// Gets predictionCol value for - /// - /// - /// predictionCol: prediction column name - /// - public string GetPredictionCol() => - (string)Reference.Invoke("getPredictionCol"); - - - /// - /// Gets userCol value for - /// - /// - /// userCol: column name for user ids. Ids must be within the integer value range. - /// - public string GetUserCol() => - (string)Reference.Invoke("getUserCol"); - - - /// - /// Loads the that was previously saved using Save(string). - /// - /// The path the previous was saved to - /// New object, loaded from path. - public static ALSModel Load(string path) => WrapAsALSModel( - SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); - - /// - /// Saves the object so that it can be loaded later using Load. Note that these objects - /// can be shared with Scala by Loading or Saving in Scala. - /// - /// The path to save the object to - public void Save(string path) => Reference.Invoke("save", path); - - /// a instance for this ML instance. - public JavaMLWriter Write() => - new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - - /// - /// Get the corresponding JavaMLReader instance. - /// - /// an instance for this ML instance. - public JavaMLReader Read() => - new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); - - private static ALSModel WrapAsALSModel(object obj) => - new ALSModel((JvmObjectReference)obj); - - - } -} diff --git a/core/src/main/dotnet/src/org/apache/spark/ml/regression/LinearRegression.cs b/core/src/main/dotnet/src/org/apache/spark/ml/regression/LinearRegression.cs deleted file mode 100644 index a7ca1aac06..0000000000 --- a/core/src/main/dotnet/src/org/apache/spark/ml/regression/LinearRegression.cs +++ /dev/null @@ -1,388 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Interop.Internal.Java.Util; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Sql.Types; -using Microsoft.Spark.Utils; -using SynapseML.Dotnet.Utils; -using Synapse.ML.LightGBM.Param; -using Microsoft.Spark.ML.Regression; - -namespace Microsoft.Spark.ML.Regression -{ - /// - /// implements LinearRegression - /// - public class LinearRegression : JavaEstimator, IJavaMLWritable, IJavaMLReadable - { - private static readonly string s_className = "org.apache.spark.ml.regression.LinearRegression"; - - /// - /// Creates a without any parameters. - /// - public LinearRegression() : base(s_className) - { - } - - /// - /// Creates a with a UID that is used to give the - /// a unique ID. - /// - /// An immutable unique ID for the object and its derivatives. - public LinearRegression(string uid) : base(s_className, uid) - { - } - - internal LinearRegression(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - /// - /// Sets aggregationDepth value for - /// - /// - /// suggested depth for treeAggregate (>= 2) - /// - /// New LinearRegression object - public LinearRegression SetAggregationDepth(int value) => - WrapAsLinearRegression(Reference.Invoke("setAggregationDepth", (object)value)); - - /// - /// Sets elasticNetParam value for - /// - /// - /// the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty - /// - /// New LinearRegression object - public LinearRegression SetElasticNetParam(double value) => - WrapAsLinearRegression(Reference.Invoke("setElasticNetParam", (object)value)); - - /// - /// Sets epsilon value for - /// - /// - /// The shape parameter to control the amount of robustness. Must be > 1.0. - /// - /// New LinearRegression object - public LinearRegression SetEpsilon(double value) => - WrapAsLinearRegression(Reference.Invoke("setEpsilon", (object)value)); - - /// - /// Sets featuresCol value for - /// - /// - /// features column name - /// - /// New LinearRegression object - public LinearRegression SetFeaturesCol(string value) => - WrapAsLinearRegression(Reference.Invoke("setFeaturesCol", (object)value)); - - /// - /// Sets fitIntercept value for - /// - /// - /// whether to fit an intercept term - /// - /// New LinearRegression object - public LinearRegression SetFitIntercept(bool value) => - WrapAsLinearRegression(Reference.Invoke("setFitIntercept", (object)value)); - - /// - /// Sets labelCol value for - /// - /// - /// label column name - /// - /// New LinearRegression object - public LinearRegression SetLabelCol(string value) => - WrapAsLinearRegression(Reference.Invoke("setLabelCol", (object)value)); - - /// - /// Sets loss value for - /// - /// - /// The loss function to be optimized. Supported options: squaredError, huber. (Default squaredError) - /// - /// New LinearRegression object - public LinearRegression SetLoss(string value) => - WrapAsLinearRegression(Reference.Invoke("setLoss", (object)value)); - - /// - /// Sets maxBlockSizeInMB value for - /// - /// - /// Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0. - /// - /// New LinearRegression object - public LinearRegression SetMaxBlockSizeInMB(double value) => - WrapAsLinearRegression(Reference.Invoke("setMaxBlockSizeInMB", (object)value)); - - /// - /// Sets maxIter value for - /// - /// - /// maximum number of iterations (>= 0) - /// - /// New LinearRegression object - public LinearRegression SetMaxIter(int value) => - WrapAsLinearRegression(Reference.Invoke("setMaxIter", (object)value)); - - /// - /// Sets predictionCol value for - /// - /// - /// prediction column name - /// - /// New LinearRegression object - public LinearRegression SetPredictionCol(string value) => - WrapAsLinearRegression(Reference.Invoke("setPredictionCol", (object)value)); - - /// - /// Sets regParam value for - /// - /// - /// regularization parameter (>= 0) - /// - /// New LinearRegression object - public LinearRegression SetRegParam(double value) => - WrapAsLinearRegression(Reference.Invoke("setRegParam", (object)value)); - - /// - /// Sets solver value for - /// - /// - /// The solver algorithm for optimization. Supported options: auto, normal, l-bfgs. (Default auto) - /// - /// New LinearRegression object - public LinearRegression SetSolver(string value) => - WrapAsLinearRegression(Reference.Invoke("setSolver", (object)value)); - - /// - /// Sets standardization value for - /// - /// - /// whether to standardize the training features before fitting the model - /// - /// New LinearRegression object - public LinearRegression SetStandardization(bool value) => - WrapAsLinearRegression(Reference.Invoke("setStandardization", (object)value)); - - /// - /// Sets tol value for - /// - /// - /// the convergence tolerance for iterative algorithms (>= 0) - /// - /// New LinearRegression object - public LinearRegression SetTol(double value) => - WrapAsLinearRegression(Reference.Invoke("setTol", (object)value)); - - /// - /// Sets weightCol value for - /// - /// - /// weight column name. If this is not set or empty, we treat all instance weights as 1.0 - /// - /// New LinearRegression object - public LinearRegression SetWeightCol(string value) => - WrapAsLinearRegression(Reference.Invoke("setWeightCol", (object)value)); - - - /// - /// Gets aggregationDepth value for - /// - /// - /// aggregationDepth: suggested depth for treeAggregate (>= 2) - /// - public int GetAggregationDepth() => - (int)Reference.Invoke("getAggregationDepth"); - - - /// - /// Gets elasticNetParam value for - /// - /// - /// elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty - /// - public double GetElasticNetParam() => - (double)Reference.Invoke("getElasticNetParam"); - - - /// - /// Gets epsilon value for - /// - /// - /// epsilon: The shape parameter to control the amount of robustness. Must be > 1.0. - /// - public double GetEpsilon() => - (double)Reference.Invoke("getEpsilon"); - - - /// - /// Gets featuresCol value for - /// - /// - /// featuresCol: features column name - /// - public string GetFeaturesCol() => - (string)Reference.Invoke("getFeaturesCol"); - - - /// - /// Gets fitIntercept value for - /// - /// - /// fitIntercept: whether to fit an intercept term - /// - public bool GetFitIntercept() => - (bool)Reference.Invoke("getFitIntercept"); - - - /// - /// Gets labelCol value for - /// - /// - /// labelCol: label column name - /// - public string GetLabelCol() => - (string)Reference.Invoke("getLabelCol"); - - - /// - /// Gets loss value for - /// - /// - /// loss: The loss function to be optimized. Supported options: squaredError, huber. (Default squaredError) - /// - public string GetLoss() => - (string)Reference.Invoke("getLoss"); - - - /// - /// Gets maxBlockSizeInMB value for - /// - /// - /// maxBlockSizeInMB: Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0. - /// - public double GetMaxBlockSizeInMB() => - (double)Reference.Invoke("getMaxBlockSizeInMB"); - - - /// - /// Gets maxIter value for - /// - /// - /// maxIter: maximum number of iterations (>= 0) - /// - public int GetMaxIter() => - (int)Reference.Invoke("getMaxIter"); - - - /// - /// Gets predictionCol value for - /// - /// - /// predictionCol: prediction column name - /// - public string GetPredictionCol() => - (string)Reference.Invoke("getPredictionCol"); - - - /// - /// Gets regParam value for - /// - /// - /// regParam: regularization parameter (>= 0) - /// - public double GetRegParam() => - (double)Reference.Invoke("getRegParam"); - - - /// - /// Gets solver value for - /// - /// - /// solver: The solver algorithm for optimization. Supported options: auto, normal, l-bfgs. (Default auto) - /// - public string GetSolver() => - (string)Reference.Invoke("getSolver"); - - - /// - /// Gets standardization value for - /// - /// - /// standardization: whether to standardize the training features before fitting the model - /// - public bool GetStandardization() => - (bool)Reference.Invoke("getStandardization"); - - - /// - /// Gets tol value for - /// - /// - /// tol: the convergence tolerance for iterative algorithms (>= 0) - /// - public double GetTol() => - (double)Reference.Invoke("getTol"); - - - /// - /// Gets weightCol value for - /// - /// - /// weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0 - /// - public string GetWeightCol() => - (string)Reference.Invoke("getWeightCol"); - - /// Fits a model to the input data. - /// The to fit the model to. - /// - override public LinearRegressionModel Fit(DataFrame dataset) => - new LinearRegressionModel( - (JvmObjectReference)Reference.Invoke("fit", dataset)); - - /// - /// Loads the that was previously saved using Save(string). - /// - /// The path the previous was saved to - /// New object, loaded from path. - public static LinearRegression Load(string path) => WrapAsLinearRegression( - SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); - - /// - /// Saves the object so that it can be loaded later using Load. Note that these objects - /// can be shared with Scala by Loading or Saving in Scala. - /// - /// The path to save the object to - public void Save(string path) => Reference.Invoke("save", path); - - /// a instance for this ML instance. - public JavaMLWriter Write() => - new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - - /// - /// Get the corresponding JavaMLReader instance. - /// - /// an instance for this ML instance. - public JavaMLReader Read() => - new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); - - private static LinearRegression WrapAsLinearRegression(object obj) => - new LinearRegression((JvmObjectReference)obj); - - - } -} diff --git a/core/src/main/dotnet/src/org/apache/spark/ml/regression/LinearRegressionModel.cs b/core/src/main/dotnet/src/org/apache/spark/ml/regression/LinearRegressionModel.cs deleted file mode 100644 index bf8e8a46d5..0000000000 --- a/core/src/main/dotnet/src/org/apache/spark/ml/regression/LinearRegressionModel.cs +++ /dev/null @@ -1,377 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Reflection; -using Microsoft.Spark.ML.Feature; -using Microsoft.Spark.ML.Feature.Param; -using Microsoft.Spark.Interop; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Interop.Internal.Java.Util; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Sql.Types; -using Microsoft.Spark.Utils; -using SynapseML.Dotnet.Utils; -using Synapse.ML.LightGBM.Param; - - -namespace Microsoft.Spark.ML.Regression -{ - /// - /// implements LinearRegressionModel - /// - public class LinearRegressionModel : JavaModel, IJavaMLWritable, IJavaMLReadable - { - private static readonly string s_className = "org.apache.spark.ml.regression.LinearRegressionModel"; - - // TODO: support this after constructing Vector class in .NET - // /// - // /// Creates a with a UID that is used to give the - // /// a unique ID. - // /// - // /// An immutable unique ID for the object and its derivatives. - // public LinearRegressionModel(string uid, Vector coefficients, double intercept, double scale) - // : this(SparkEnvironment.JvmBridge.CallConstructor(s_className, uid, coefficients, intercept, scale)) - // { - // } - - internal LinearRegressionModel(JvmObjectReference jvmObject) : base(jvmObject) - { - } - - /// - /// Sets aggregationDepth value for - /// - /// - /// suggested depth for treeAggregate (>= 2) - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetAggregationDepth(int value) => - WrapAsLinearRegressionModel(Reference.Invoke("setAggregationDepth", (object)value)); - - /// - /// Sets elasticNetParam value for - /// - /// - /// the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetElasticNetParam(double value) => - WrapAsLinearRegressionModel(Reference.Invoke("setElasticNetParam", (object)value)); - - /// - /// Sets epsilon value for - /// - /// - /// The shape parameter to control the amount of robustness. Must be > 1.0. - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetEpsilon(double value) => - WrapAsLinearRegressionModel(Reference.Invoke("setEpsilon", (object)value)); - - /// - /// Sets featuresCol value for - /// - /// - /// features column name - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetFeaturesCol(string value) => - WrapAsLinearRegressionModel(Reference.Invoke("setFeaturesCol", (object)value)); - - /// - /// Sets fitIntercept value for - /// - /// - /// whether to fit an intercept term - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetFitIntercept(bool value) => - WrapAsLinearRegressionModel(Reference.Invoke("setFitIntercept", (object)value)); - - /// - /// Sets labelCol value for - /// - /// - /// label column name - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetLabelCol(string value) => - WrapAsLinearRegressionModel(Reference.Invoke("setLabelCol", (object)value)); - - /// - /// Sets loss value for - /// - /// - /// The loss function to be optimized. Supported options: squaredError, huber. (Default squaredError) - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetLoss(string value) => - WrapAsLinearRegressionModel(Reference.Invoke("setLoss", (object)value)); - - /// - /// Sets maxBlockSizeInMB value for - /// - /// - /// Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0. - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetMaxBlockSizeInMB(double value) => - WrapAsLinearRegressionModel(Reference.Invoke("setMaxBlockSizeInMB", (object)value)); - - /// - /// Sets maxIter value for - /// - /// - /// maximum number of iterations (>= 0) - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetMaxIter(int value) => - WrapAsLinearRegressionModel(Reference.Invoke("setMaxIter", (object)value)); - - /// - /// Sets predictionCol value for - /// - /// - /// prediction column name - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetPredictionCol(string value) => - WrapAsLinearRegressionModel(Reference.Invoke("setPredictionCol", (object)value)); - - /// - /// Sets regParam value for - /// - /// - /// regularization parameter (>= 0) - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetRegParam(double value) => - WrapAsLinearRegressionModel(Reference.Invoke("setRegParam", (object)value)); - - /// - /// Sets solver value for - /// - /// - /// The solver algorithm for optimization. Supported options: auto, normal, l-bfgs. (Default auto) - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetSolver(string value) => - WrapAsLinearRegressionModel(Reference.Invoke("setSolver", (object)value)); - - /// - /// Sets standardization value for - /// - /// - /// whether to standardize the training features before fitting the model - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetStandardization(bool value) => - WrapAsLinearRegressionModel(Reference.Invoke("setStandardization", (object)value)); - - /// - /// Sets tol value for - /// - /// - /// the convergence tolerance for iterative algorithms (>= 0) - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetTol(double value) => - WrapAsLinearRegressionModel(Reference.Invoke("setTol", (object)value)); - - /// - /// Sets weightCol value for - /// - /// - /// weight column name. If this is not set or empty, we treat all instance weights as 1.0 - /// - /// New LinearRegressionModel object - public LinearRegressionModel SetWeightCol(string value) => - WrapAsLinearRegressionModel(Reference.Invoke("setWeightCol", (object)value)); - - - /// - /// Gets aggregationDepth value for - /// - /// - /// aggregationDepth: suggested depth for treeAggregate (>= 2) - /// - public int GetAggregationDepth() => - (int)Reference.Invoke("getAggregationDepth"); - - - /// - /// Gets elasticNetParam value for - /// - /// - /// elasticNetParam: the ElasticNet mixing parameter, in range [0, 1]. For alpha = 0, the penalty is an L2 penalty. For alpha = 1, it is an L1 penalty - /// - public double GetElasticNetParam() => - (double)Reference.Invoke("getElasticNetParam"); - - - /// - /// Gets epsilon value for - /// - /// - /// epsilon: The shape parameter to control the amount of robustness. Must be > 1.0. - /// - public double GetEpsilon() => - (double)Reference.Invoke("getEpsilon"); - - - /// - /// Gets featuresCol value for - /// - /// - /// featuresCol: features column name - /// - public string GetFeaturesCol() => - (string)Reference.Invoke("getFeaturesCol"); - - - /// - /// Gets fitIntercept value for - /// - /// - /// fitIntercept: whether to fit an intercept term - /// - public bool GetFitIntercept() => - (bool)Reference.Invoke("getFitIntercept"); - - - /// - /// Gets labelCol value for - /// - /// - /// labelCol: label column name - /// - public string GetLabelCol() => - (string)Reference.Invoke("getLabelCol"); - - - /// - /// Gets loss value for - /// - /// - /// loss: The loss function to be optimized. Supported options: squaredError, huber. (Default squaredError) - /// - public string GetLoss() => - (string)Reference.Invoke("getLoss"); - - - /// - /// Gets maxBlockSizeInMB value for - /// - /// - /// maxBlockSizeInMB: Maximum memory in MB for stacking input data into blocks. Data is stacked within partitions. If more than remaining data size in a partition then it is adjusted to the data size. Default 0.0 represents choosing optimal value, depends on specific algorithm. Must be >= 0. - /// - public double GetMaxBlockSizeInMB() => - (double)Reference.Invoke("getMaxBlockSizeInMB"); - - - /// - /// Gets maxIter value for - /// - /// - /// maxIter: maximum number of iterations (>= 0) - /// - public int GetMaxIter() => - (int)Reference.Invoke("getMaxIter"); - - - /// - /// Gets predictionCol value for - /// - /// - /// predictionCol: prediction column name - /// - public string GetPredictionCol() => - (string)Reference.Invoke("getPredictionCol"); - - - /// - /// Gets regParam value for - /// - /// - /// regParam: regularization parameter (>= 0) - /// - public double GetRegParam() => - (double)Reference.Invoke("getRegParam"); - - - /// - /// Gets solver value for - /// - /// - /// solver: The solver algorithm for optimization. Supported options: auto, normal, l-bfgs. (Default auto) - /// - public string GetSolver() => - (string)Reference.Invoke("getSolver"); - - - /// - /// Gets standardization value for - /// - /// - /// standardization: whether to standardize the training features before fitting the model - /// - public bool GetStandardization() => - (bool)Reference.Invoke("getStandardization"); - - - /// - /// Gets tol value for - /// - /// - /// tol: the convergence tolerance for iterative algorithms (>= 0) - /// - public double GetTol() => - (double)Reference.Invoke("getTol"); - - - /// - /// Gets weightCol value for - /// - /// - /// weightCol: weight column name. If this is not set or empty, we treat all instance weights as 1.0 - /// - public string GetWeightCol() => - (string)Reference.Invoke("getWeightCol"); - - - /// - /// Loads the that was previously saved using Save(string). - /// - /// The path the previous was saved to - /// New object, loaded from path. - public static LinearRegressionModel Load(string path) => WrapAsLinearRegressionModel( - SparkEnvironment.JvmBridge.CallStaticJavaMethod(s_className, "load", path)); - - /// - /// Saves the object so that it can be loaded later using Load. Note that these objects - /// can be shared with Scala by Loading or Saving in Scala. - /// - /// The path to save the object to - public void Save(string path) => Reference.Invoke("save", path); - - /// a instance for this ML instance. - public JavaMLWriter Write() => - new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - - /// - /// Get the corresponding JavaMLReader instance. - /// - /// an instance for this ML instance. - public JavaMLReader Read() => - new JavaMLReader((JvmObjectReference)Reference.Invoke("read")); - - private static LinearRegressionModel WrapAsLinearRegressionModel(object obj) => - new LinearRegressionModel((JvmObjectReference)obj); - - - } -} diff --git a/core/src/main/dotnet/test/E2ETestUtils.cs b/core/src/main/dotnet/test/E2ETestUtils.cs deleted file mode 100644 index b18ba45e4b..0000000000 --- a/core/src/main/dotnet/test/E2ETestUtils.cs +++ /dev/null @@ -1,363 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. -// See the LICENSE file in the project root for more information. - -using System; -using System.Diagnostics; -using System.IO; -using System.Reflection; -using System.Linq; -using Xunit.Sdk; -using System.Runtime.InteropServices; -using System.Text; -using Microsoft.Spark.Interop.Ipc; -using Microsoft.Spark.Sql; -using Microsoft.Spark.Services; -using Xunit; - -namespace SynapseMLtest.Utils -{ - /// - /// Creates a temporary folder that is automatically cleaned up when disposed. - /// - public class TemporaryDirectory : IDisposable - { - private bool _disposed = false; - - /// - /// Path to temporary folder. - /// - public string Path { get; } - - public TemporaryDirectory() - { - Path = System.IO.Path.Combine(System.IO.Path.GetTempPath(), Guid.NewGuid().ToString()); - Cleanup(); - Directory.CreateDirectory(Path); - Path = $"{Path}{System.IO.Path.DirectorySeparatorChar}"; - } - - public void Dispose() - { - Dispose(true); - GC.SuppressFinalize(this); - } - - private void Cleanup() - { - if (File.Exists(Path)) - { - File.Delete(Path); - } - else if (Directory.Exists(Path)) - { - Directory.Delete(Path, true); - } - } - - private void Dispose(bool disposing) - { - if (_disposed) - { - return; - } - - if (disposing) - { - Cleanup(); - } - - _disposed = true; - } - } - - internal static class SparkSettings - { - internal static Version Version { get; private set; } - internal static string SparkHome { get; private set; } - - static SparkSettings() - { - InitSparkHome(); - InitVersion(); - } - - private static void InitSparkHome() - { - SparkHome = Environment.GetEnvironmentVariable("SPARK_HOME"); - if (SparkHome == null) - { - throw new NullException("SPARK_HOME environment variable is not set."); - } - } - - private static void InitVersion() - { - // First line of the RELEASE file under SPARK_HOME will be something similar to: - // Spark 2.4.0 built for Hadoop 2.7.3 - string firstLine = - File.ReadLines($"{SparkHome}{Path.DirectorySeparatorChar}RELEASE").First(); - - // Grab "2.4.0" from "Spark 2.4.0 built for Hadoop 2.7.3" - string versionStr = firstLine.Split(' ')[1]; - - // Strip anything below version number. - // For example, "3.0.0-preview" should become "3.0.0". - Version = new Version(versionStr.Split('-')[0]); - } - } - - internal static class TestEnvironment - { - private static string s_resourceDirectory; - internal static string ResourceDirectory - { - get - { - if (s_resourceDirectory is null) - { - s_resourceDirectory = - AppDomain.CurrentDomain.BaseDirectory + - Path.DirectorySeparatorChar + - "Resources" + - Path.DirectorySeparatorChar; - } - - return s_resourceDirectory; - } - } - } - - /// - /// SparkFixture acts as a global fixture to start Spark application in a debug - /// mode through the spark-submit. It also provides a default SparkSession - /// object that any tests can use. - /// - public sealed class SparkFixture : IDisposable - { - /// - /// The names of environment variables used by the SparkFixture. - /// - public class EnvironmentVariableNames - { - /// - /// This environment variable specifies extra args passed to spark-submit. - /// - public const string ExtraSparkSubmitArgs = - "DOTNET_SPARKFIXTURE_EXTRA_SPARK_SUBMIT_ARGS"; - - /// - /// This environment variable specifies the path where the DotNet worker is installed. - /// - public const string WorkerDir = ConfigurationService.DefaultWorkerDirEnvVarName; - } - - private readonly Process _process = new Process(); - private readonly TemporaryDirectory _tempDirectory = new TemporaryDirectory(); - - public const string DefaultLogLevel = "ERROR"; - - public SparkSession Spark { get; } - - public IJvmBridge Jvm { get; } - - public SparkFixture() - { - // The worker directory must be set for the Microsoft.Spark.Worker executable. - if (string.IsNullOrEmpty( - Environment.GetEnvironmentVariable(EnvironmentVariableNames.WorkerDir))) - { - throw new Exception( - $"Environment variable '{EnvironmentVariableNames.WorkerDir}' must be set."); - } - - BuildSparkCmd(out var filename, out var args); - - // Configure the process using the StartInfo properties. - _process.StartInfo.FileName = filename; - _process.StartInfo.Arguments = args; - // UseShellExecute defaults to true in .NET Framework, - // but defaults to false in .NET Core. To support both, set it - // to false which is required for stream redirection. - _process.StartInfo.UseShellExecute = false; - _process.StartInfo.RedirectStandardInput = true; - _process.StartInfo.RedirectStandardOutput = true; - // _process.StartInfo.RedirectStandardError = true; - - bool isSparkReady = false; - _process.OutputDataReceived += (sender, arguments) => - { - // Scala-side driver for .NET emits the following message after it is - // launched and ready to accept connections. - if (!isSparkReady && - arguments.Data.Contains("Backend running debug mode")) - { - isSparkReady = true; - } - }; - - _process.Start(); - _process.BeginOutputReadLine(); - // _process.BeginErrorReadLine(); - - bool processExited = false; - while (!isSparkReady && !processExited) - { - processExited = _process.WaitForExit(500); - } - - if (processExited) - { - _process.Dispose(); - - // The process should not have been exited. - throw new Exception( - $"Process exited prematurely with '{filename} {args}'."); - } - - Spark = SparkSession - .Builder() - // Lower the shuffle partitions to speed up groupBy() operations. - .Config("spark.sql.shuffle.partitions", "3") - .Config("spark.ui.enabled", true) - .Config("spark.ui.showConsoleProgress", true) - .AppName("SynapseML dotnet E2E Test") - .GetOrCreate(); - - Spark.SparkContext.SetLogLevel(DefaultLogLevel); - - Jvm = Spark.Reference.Jvm; - } - - public string AddPackages(string args) - { - string packagesOption = "--packages "; - string[] splits = args.Split(packagesOption, 2); - - StringBuilder newArgs = new StringBuilder(splits[0]) - .Append(packagesOption) - .Append(GetAvroPackage()) - .Append(",") - .Append(GetSynapseMLPackage()); - if (splits.Length > 1) - { - newArgs.Append(",").Append(splits[1]); - } - - return newArgs.ToString(); - } - - public string GetAvroPackage() - { - Version sparkVersion = SparkSettings.Version; - string avroVersion = sparkVersion.Major switch - { - 2 => $"spark-avro_2.11:{sparkVersion}", - 3 => $"spark-avro_2.12:{sparkVersion}", - _ => throw new NotSupportedException($"Spark {sparkVersion} not supported.") - }; - - return $"org.apache.spark:{avroVersion}"; - } - - public string GetSynapseMLPackage() => Helper.GetSynapseMLPackage(); - - public string AddSynapseMLRepo() - { - bool addSynapseMLRepo = true; - if (addSynapseMLRepo) - { - return "--repositories https://mmlspark.azureedge.net/maven"; - } - return ""; - } - - public string ExclusionsForSynapseML() - { - bool excludePackages = true; - if (excludePackages) - { - return "--exclude-packages org.json4s:json4s-ast_2.12,io.netty:netty-tcnative-boringssl-static"; - } - return ""; - } - - public void Dispose() - { - Spark.Dispose(); - - // CSparkRunner will exit upon receiving newline from - // the standard input stream. - _process.StandardInput.WriteLine("done"); - _process.StandardInput.Flush(); - _process.WaitForExit(); - - _tempDirectory.Dispose(); - } - - private void BuildSparkCmd(out string filename, out string args) - { - string sparkHome = SparkSettings.SparkHome; - - // Build the executable name. - filename = Path.Combine(sparkHome, "bin", "spark-submit"); - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) - { - filename += ".cmd"; - } - - if (!File.Exists(filename)) - { - throw new FileNotFoundException($"{filename} does not exist."); - } - - // Build the arguments for the spark-submit. - string classArg = "--class org.apache.spark.deploy.dotnet.DotnetRunner"; - string curDir = AppDomain.CurrentDomain.BaseDirectory; - string jarPrefix = GetJarPrefix(); - string jarDir = curDir; - string assemblyVersion = "2.1.1"; - string scalaVersion = (SparkSettings.Version.Major == 3) ? "2.12" : "2.11"; - string jar = Path.Combine(jarDir, $"{jarPrefix}_{scalaVersion}-{assemblyVersion}.jar"); - - if (!File.Exists(jar)) - { - throw new FileNotFoundException($"{jar} does not exist."); - } - - string warehouseUri = new Uri( - Path.Combine(_tempDirectory.Path, "spark-warehouse")).AbsoluteUri; - string warehouseDir = $"--conf spark.sql.warehouse.dir={warehouseUri}"; - - string extraArgs = Environment.GetEnvironmentVariable( - EnvironmentVariableNames.ExtraSparkSubmitArgs) ?? ""; - - // If there exists log4j.properties in SPARK_HOME/conf directory, Spark from 2.3.* - // to 2.4.0 hang in E2E test. The reverse behavior is true for Spark 2.4.1; if - // there does not exist log4j.properties, the tests hang. - // Note that the hang happens in JVM when it tries to append a console logger (log4j). - // The solution is to use custom log configuration that appends NullLogger, which - // works across all Spark versions. - string resourceUri = new Uri(TestEnvironment.ResourceDirectory).AbsoluteUri; - string logOption = "--conf spark.driver.extraJavaOptions=-Dlog4j.configuration=" + - $"{resourceUri}/log4j.properties"; - - args = $"{logOption} {warehouseDir} {AddPackages(extraArgs)} {AddSynapseMLRepo()} {ExclusionsForSynapseML()} {classArg} --master local {jar} debug"; - } - - private string GetJarPrefix() - { - Version sparkVersion = SparkSettings.Version; - return $"microsoft-spark-{sparkVersion.Major}-{sparkVersion.Minor}"; - } - } - - // [CollectionDefinition("Spark E2E Tests")] - // public class SparkCollection : ICollectionFixture - // { - // // This class has no code, and is never created. Its purpose is simply - // // to be the place to apply [CollectionDefinition] and all the - // // ICollectionFixture<> interfaces. - // } - -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodeGen.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodeGen.scala index b423b24f97..cdc37483d4 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodeGen.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodeGen.scala @@ -5,7 +5,6 @@ package com.microsoft.azure.synapse.ml.codegen import com.microsoft.azure.synapse.ml.codegen.CodegenConfigProtocol._ import com.microsoft.azure.synapse.ml.codegen.PyCodegen.pyGen -import com.microsoft.azure.synapse.ml.codegen.DotnetCodegen.dotnetGen import com.microsoft.azure.synapse.ml.codegen.RCodegen.rGen import org.apache.commons.io.FileUtils import spray.json._ @@ -27,6 +26,5 @@ object CodeGen { clean(conf.packageDir) rGen(conf) pyGen(conf) - dotnetGen(conf) } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodegenConfig.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodegenConfig.scala index 790e25f11e..7b98b40817 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodegenConfig.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/CodegenConfig.scala @@ -15,7 +15,6 @@ case class CodegenConfig(name: String, version: String, pythonizedVersion: String, rVersion: String, - dotnetVersion: String, packageName: String) { def generatedDir: File = new File(targetDir, "generated") @@ -51,21 +50,6 @@ case class CodegenConfig(name: String, def rSrcOverrideDir: File = new File(topDir, "src/main/R") - //Dotnet Codegen Constants - def dotnetSrcDir: File = new File(srcDir, "dotnet") - - def dotnetPackageDir: File = new File(packageDir, "dotnet") - - def dotnetTestDir: File = new File(testDir, "dotnet") - - def dotnetTestDataDir: File = new File(testDataDir, "dotnet") - - def dotnetSrcOverrideDir: File = new File(topDir, "src/main/dotnet") - - def dotnetSrcHelperDir: File = new File(dotnetSrcDir, "helper") - - def dotnetTestOverrideDir: File = new File(topDir, "src/test/dotnet") - //val rPackageFile = new File(rPackageDir, s"mmlspark-$mmlVer.zip") def internalPrefix: String = "_" @@ -105,5 +89,5 @@ case class CodegenConfig(name: String, } object CodegenConfigProtocol extends DefaultJsonProtocol { - implicit val CCFormat: RootJsonFormat[CodegenConfig] = jsonFormat9(CodegenConfig.apply) + implicit val CCFormat: RootJsonFormat[CodegenConfig] = jsonFormat8(CodegenConfig.apply) } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DefaultParamInfo.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DefaultParamInfo.scala index ec3085f3ce..0946bfec88 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DefaultParamInfo.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DefaultParamInfo.scala @@ -12,15 +12,13 @@ import scala.reflect.ClassTag case class ParamInfo[T <: Param[_]: ClassTag](pyType: String, pyTypeConverter: Option[String], rTypeConverter: Option[String], - dotnetType: String, example: Any) { - - def this(pyType: String, typeConverterArg: String, rTypeConverterArg: String, dotnetType: String, example: Any) = { - this(pyType, Some(typeConverterArg), Some(rTypeConverterArg), dotnetType, example) + def this(pyType: String, typeConverterArg: String, rTypeConverterArg: String, example: Any) = { + this(pyType, Some(typeConverterArg), Some(rTypeConverterArg), example) } - def this(pyType: String, dotnetType: String, example: Any) = { - this(pyType, None, None, dotnetType, example) + def this(pyType: String, example: Any) = { + this(pyType, None, None, example) } } @@ -28,41 +26,41 @@ case class ParamInfo[T <: Param[_]: ClassTag](pyType: String, object DefaultParamInfo extends Logging { val BooleanInfo = new ParamInfo[BooleanParam]( - "bool", "TypeConverters.toBoolean", "as.logical", "bool", true) + "bool", "TypeConverters.toBoolean", "as.logical", true) val IntInfo = new ParamInfo[IntParam]( - "int", "TypeConverters.toInt", "as.integer", "int", 1) + "int", "TypeConverters.toInt", "as.integer", 1) val LongInfo = new ParamInfo[LongParam]( - "long", None, Some("as.integer"), "long", 1L) + "long", None, Some("as.integer"), 1L) val FloatInfo = new ParamInfo[FloatParam]( - "float", "TypeConverters.toFloat", "as.double", "float", 1.0) + "float", "TypeConverters.toFloat", "as.double", 1.0) val DoubleInfo = new ParamInfo[DoubleParam]( - "float", "TypeConverters.toFloat", "as.double", "double", 1.0) + "float", "TypeConverters.toFloat", "as.double", 1.0) val StringInfo = new ParamInfo[Param[String]]( - "str", Some("TypeConverters.toString"), None, "string", "foo") + "str", Some("TypeConverters.toString"), None, "foo") val StringArrayInfo = new ParamInfo[StringArrayParam]( - "list", "TypeConverters.toListString", "as.array", "string[]", Array("foo", "bar")) + "list", "TypeConverters.toListString", "as.array", Array("foo", "bar")) val DoubleArrayInfo = new ParamInfo[DoubleArrayParam]( - "list", "TypeConverters.toListFloat", "as.array", "double[]", Array(1.0, 2.0)) + "list", "TypeConverters.toListFloat", "as.array", Array(1.0, 2.0)) val IntArrayInfo = new ParamInfo[IntArrayParam]( - "list", "TypeConverters.toListInt", "as.array", "int[]", Array(1, 2)) + "list", "TypeConverters.toListInt", "as.array", Array(1, 2)) val ByteArrayInfo = new ParamInfo[ByteArrayParam]( - "list", "byte[]", Array(1.toByte, 0.toByte)) + "list", Array(1.toByte, 0.toByte)) val DoubleArrayArrayInfo = new ParamInfo[DoubleArrayArrayParam]( - "object", "double[][]", Array(Array(1.0, 2.0))) + "object", Array(Array(1.0, 2.0))) val StringStringMapInfo = new ParamInfo[StringStringMapParam]( - "dict", "Dictionary", Map("foo" -> "bar")) + "dict", Map("foo" -> "bar")) val StringIntMapInfo = new ParamInfo[StringIntMapParam]( - "dict", "Dictionary", Map("foo" -> 1)) + "dict", Map("foo" -> 1)) val ArrayMapInfo = new ParamInfo[ArrayMapParam]( - "object", "Dictionary[]", Array(Map("foo" -> 1))) + "object", Array(Map("foo" -> 1))) val TypedIntArrayInfo = new ParamInfo[TypedIntArrayParam]( - "object", "int[]", Array(1, 2)) + "object", Array(1, 2)) val TypedDoubleArrayInfo = new ParamInfo[TypedDoubleArrayParam]( - "object", "double[]", Array(1.0, 2.0)) + "object", Array(1.0, 2.0)) val UntypedArrayInfo = new ParamInfo[UntypedArrayParam]( - "object", "object[]", Array(1.0, 2.0)) + "object", Array(1.0, 2.0)) val UnknownInfo = new ParamInfo[Param[_]]( - "object", "object", null) //scalastyle:ignore null + "object", null) //scalastyle:ignore null //scalastyle:off cyclomatic.complexity def getGeneralParamInfo(dataType: Param[_]): ParamInfo[_] = { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DotnetCodegen.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DotnetCodegen.scala deleted file mode 100644 index 1f85c6f1f2..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DotnetCodegen.scala +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.codegen - -import com.microsoft.azure.synapse.ml.codegen.CodegenConfigProtocol._ -import com.microsoft.azure.synapse.ml.core.env.FileUtilities._ -import com.microsoft.azure.synapse.ml.core.utils.JarLoadingUtils.instantiateServices -import org.apache.commons.io.FileUtils -import spray.json._ - -import java.io.File - - -object DotnetCodegen { - - import CodeGenUtils._ - - def generateDotnetClasses(conf: CodegenConfig): Unit = { - val instantiatedClasses = instantiateServices[DotnetWrappable](conf.jarName) - instantiatedClasses.foreach { w => - println(w.getClass.getName) - w.makeDotnetFile(conf) - } - } - - //noinspection ScalaStyle - def generateDotnetProjFile(conf: CodegenConfig): Unit = { - if (!conf.dotnetSrcDir.exists()) { - conf.dotnetSrcDir.mkdir() - } - val curProject = conf.name.split("-").drop(1).map(s => s.capitalize).mkString("") - val projectDir = join(conf.dotnetSrcDir, "synapse", "ml") - if (!projectDir.exists()){ - projectDir.mkdirs() - } - val newtonsoftDep = if(curProject == "DeepLearning") { - s"""""".stripMargin - } else "" - // TODO: update SynapseML.DotnetBase version whenever we upload a new one - writeFile(new File(projectDir, s"${curProject}ProjectSetup.csproj"), - s""" - | - | - | netstandard2.1 - | 9.0 - | SynapseML.$curProject - | true - | true - | .NET for SynapseML.$curProject - | ${conf.dotnetVersion} - | - | - | - | - | - | - | $newtonsoftDep - | - | - | - | - | - | - | - | Microsoft.Spark;SynapseML.DotnetBase - | - | - | - | false - | - | - | - | - |""".stripMargin) - } - - def dotnetGen(conf: CodegenConfig): Unit = { - println(s"Generating dotnet for ${conf.jarName}") - clean(conf.dotnetSrcDir) - generateDotnetClasses(conf) - if (conf.dotnetSrcOverrideDir.exists()) - FileUtils.copyDirectoryToDirectory(toDir(conf.dotnetSrcOverrideDir), toDir(conf.dotnetSrcHelperDir)) - generateDotnetProjFile(conf) - } - - def main(args: Array[String]): Unit = { - val conf = args.head.parseJson.convertTo[CodegenConfig] - clean(conf.dotnetPackageDir) - dotnetGen(conf) - } - -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DotnetWrappable.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DotnetWrappable.scala deleted file mode 100644 index 5203120003..0000000000 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/DotnetWrappable.scala +++ /dev/null @@ -1,286 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.codegen - -import com.microsoft.azure.synapse.ml.core.env.FileUtilities -import com.microsoft.azure.synapse.ml.param.{DotnetWrappableParam, ServiceParam, WrappableParam} -import org.apache.commons.lang.StringUtils.capitalize -import org.apache.spark.ml._ -import org.apache.spark.ml.evaluation.Evaluator -import org.apache.spark.ml.param._ - -import java.nio.charset.StandardCharsets -import java.nio.file.Files -import scala.collection.JavaConverters._ - -// TODO: delete as moved this to dotnet/spark repo -object DotnetHelper { - - def setPipelineStages(pipeline: Pipeline, value: java.util.ArrayList[_ <: PipelineStage]): Pipeline = - pipeline.setStages(value.asScala.toArray) - - def convertToJavaMap(value: Map[_, _]): java.util.Map[_, _] = value.asJava - - // TODO: support more types for UntypedArrayParam - def mapScalaToJava(value: java.lang.Object): Any = { - value match { - case i: java.lang.Integer => i.toInt - case d: java.lang.Double => d.toDouble - case f: java.lang.Float => f.toFloat - case b: java.lang.Boolean => b.booleanValue() - case l: java.lang.Long => l.toLong - case s: java.lang.Short => s.toShort - case by: java.lang.Byte => by.toByte - case c: java.lang.Character => c.toChar - case _ => value - } - } -} - -trait DotnetWrappable extends BaseWrappable { - - import GenerationUtils._ - - protected lazy val dotnetCopyrightLines: String = - s"""|// Copyright (C) Microsoft Corporation. All rights reserved. - |// Licensed under the MIT License. See LICENSE in project root for information. - |""".stripMargin - - protected lazy val dotnetNamespace: String = - thisStage.getClass.getName - .replace("com.microsoft.azure.synapse.ml", "Synapse.ML") - .replace("org.apache.spark.ml", "Microsoft.Spark.ML") - .split(".".toCharArray).map(capitalize).dropRight(1).mkString(".") - - protected lazy val dotnetInternalWrapper = false - - protected lazy val dotnetClassName: String = { - if (dotnetInternalWrapper) { - "_" + classNameHelper - } else { - "" + classNameHelper - } - } - - protected lazy val dotnetClassNameString: String = "s_className" - - protected lazy val dotnetClassWrapperName: String = "WrapAs" + dotnetClassName - - protected lazy val dotnetObjectBaseClass: String = { - thisStage match { - case _: Estimator[_] => s"JavaEstimator<${companionModelClassName.split(".".toCharArray).last}>" - case _: Model[_] => s"JavaModel<$dotnetClassName>" - case _: Transformer => s"JavaTransformer" - case _: Evaluator => s"JavaEvaluator" - } - } - - protected def dotnetMLReadWriteMethods: String = { - s"""|/// - |/// Loads the that was previously saved using Save(string). - |/// - |/// The path the previous was saved to - |/// New object, loaded from path. - |public static $dotnetClassName Load(string path) => $dotnetClassWrapperName( - | SparkEnvironment.JvmBridge.CallStaticJavaMethod($dotnetClassNameString, "load", path)); - | - |/// - |/// Saves the object so that it can be loaded later using Load. Note that these objects - |/// can be shared with Scala by Loading or Saving in Scala. - |/// - |/// The path to save the object to - |public void Save(string path) => Reference.Invoke("save", path); - | - |/// a instance for this ML instance. - |public JavaMLWriter Write() => - | new JavaMLWriter((JvmObjectReference)Reference.Invoke("write")); - | - |/// - |/// Get the corresponding JavaMLReader instance. - |/// - |/// an instance for this ML instance. - |public JavaMLReader<$dotnetClassName> Read() => - | new JavaMLReader<$dotnetClassName>((JvmObjectReference)Reference.Invoke("read")); - |""".stripMargin - } - - protected def dotnetWrapAsTypeMethod: String = { - s"""|private static $dotnetClassName $dotnetClassWrapperName(object obj) => - | new $dotnetClassName((JvmObjectReference)obj); - |""".stripMargin - } - - def dotnetAdditionalMethods: String = { - "" - } - - //noinspection ScalaStyle - protected def dotnetParamSetter(p: Param[_]): String = { - val capName = p.name.capitalize - val docString = - s"""|/// - |/// Sets value for ${p.name} - |/// - |/// - |/// ${p.doc} - |/// - |/// New $dotnetClassName object """.stripMargin - p match { - // TODO: Fix UDF & UDPyF confusion; ParamSpaceParam, BallTreeParam, ConditionalBallTreeParam type - case sp: ServiceParam[_] => - s"""|$docString - |${sp.dotnetSetter(dotnetClassName, capName, dotnetClassWrapperName)} - | - |${docString.replaceFirst(sp.name, s"${sp.name} column")} - |${sp.dotnetSetterForSrvParamCol(dotnetClassName, capName, dotnetClassWrapperName)} - |""".stripMargin - case wp: DotnetWrappableParam[_] => - s"""|$docString - |${wp.dotnetSetter(dotnetClassName, capName, dotnetClassWrapperName)} - |""".stripMargin - case _ => - s"""|$docString - |public $dotnetClassName Set$capName(${getParamInfo(p).dotnetType} value) => - | $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value)); - |""".stripMargin - } - } - - protected def dotnetParamSetters: String = - thisStage.params.map(dotnetParamSetter).mkString("\n") - - //noinspection ScalaStyle - protected def dotnetParamGetter(p: Param[_]): String = { - val capName = p.name.capitalize - val docString = - s"""|/// - |/// Gets ${p.name} value - |/// - |/// - |/// ${p.name}: ${p.doc} - |/// """.stripMargin - p match { - case wp: DotnetWrappableParam[_] => - s"""|$docString - |${wp.dotnetGetter(capName)} - |""".stripMargin - case _ => - s"""|$docString - |public ${getParamInfo(p).dotnetType} Get$capName() => - | (${getParamInfo(p).dotnetType})Reference.Invoke(\"get$capName\"); - |""".stripMargin - } - } - - protected def dotnetParamGetters: String = - thisStage.params.map(dotnetParamGetter).mkString("\n") - - //noinspection ScalaStyle - protected def dotnetExtraMethods: String = { - thisStage match { - case _: Estimator[_] => - s"""|/// Fits a model to the input data. - |/// The to fit the model to. - |/// - |override public ${companionModelClassName.split(".".toCharArray).last} Fit(DataFrame dataset) => - | new ${companionModelClassName.split(".".toCharArray).last}( - | (JvmObjectReference)Reference.Invoke("fit", dataset)); - |""".stripMargin - case _ => - "" - } - } - - protected def dotnetExtraEstimatorImports: String = { - thisStage match { - case _: Estimator[_] => - val companionModelImport = companionModelClassName - .replaceAllLiterally("com.microsoft.azure.synapse.ml", "Synapse.ML") - .replaceAllLiterally("org.apache.spark.ml", "Microsoft.Spark.ML") - .replaceAllLiterally("org.apache.spark", "Microsoft.Spark") - .split(".".toCharArray) - .map(capitalize) - .dropRight(1) - .mkString(".") - s"using $companionModelImport;" - case _ => - "" - } - } - - //noinspection ScalaStyle - protected def dotnetClass(): String = { - s"""|$dotnetCopyrightLines - | - |using System; - |using System.Collections.Generic; - |using System.Linq; - |using System.Reflection; - |using Microsoft.Spark.ML.Feature; - |using Microsoft.Spark.ML.Feature.Param; - |using Microsoft.Spark.Interop; - |using Microsoft.Spark.Interop.Ipc; - |using Microsoft.Spark.Interop.Internal.Java.Util; - |using Microsoft.Spark.Sql; - |using Microsoft.Spark.Sql.Types; - |using Microsoft.Spark.Utils; - |using SynapseML.Dotnet.Utils; - |using Synapse.ML.LightGBM.Param; - |$dotnetExtraEstimatorImports - | - |namespace $dotnetNamespace - |{ - | /// - | /// implements $dotnetClassName - | /// - | public class $dotnetClassName : $dotnetObjectBaseClass, IJavaMLWritable, IJavaMLReadable<$dotnetClassName> - | { - | private static readonly string $dotnetClassNameString = \"${thisStage.getClass.getName}\"; - | - | /// - | /// Creates a without any parameters. - | /// - | public $dotnetClassName() : base($dotnetClassNameString) - | { - | } - | - | /// - | /// Creates a with a UID that is used to give the - | /// a unique ID. - | /// - | /// An immutable unique ID for the object and its derivatives. - | public $dotnetClassName(string uid) : base($dotnetClassNameString, uid) - | { - | } - | - | internal $dotnetClassName(JvmObjectReference jvmObject) : base(jvmObject) - | { - | } - | - |${indent(dotnetParamSetters, 2)} - |${indent(dotnetParamGetters, 2)} - |${indent(dotnetExtraMethods, 2)} - |${indent(dotnetMLReadWriteMethods, 2)} - |${indent(dotnetWrapAsTypeMethod, 2)} - |${indent(dotnetAdditionalMethods, 2)} - | } - |} - | - """.stripMargin - } - - def makeDotnetFile(conf: CodegenConfig): Unit = { - val importPath = thisStage.getClass.getName.split(".".toCharArray).dropRight(1) - val srcFolders = importPath.mkString(".") - .replaceAllLiterally("com.microsoft.azure.synapse.ml", "synapse.ml").split(".".toCharArray) - val srcDir = FileUtilities.join((Seq(conf.dotnetSrcDir.toString) ++ srcFolders.toSeq): _*) - if (!srcDir.exists()) { - srcDir.mkdirs() - } - Files.write( - FileUtilities.join(srcDir, dotnetClassName + ".cs").toPath, - dotnetClass().getBytes(StandardCharsets.UTF_8)) - } - -} diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/GenerationUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/GenerationUtils.scala index 818194f6ed..82a8b86fec 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/GenerationUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/GenerationUtils.scala @@ -3,7 +3,7 @@ package com.microsoft.azure.synapse.ml.codegen -import com.microsoft.azure.synapse.ml.param.{DotnetWrappableParam, PipelineStageWrappable, PythonWrappableParam} +import com.microsoft.azure.synapse.ml.param.{PipelineStageWrappable, PythonWrappableParam} import com.microsoft.azure.synapse.ml.param.RWrappableParam import org.apache.spark.ml.param._ @@ -48,26 +48,6 @@ object GenerationUtils { } } - def dotnetRenderParam[T](pp: ParamPair[T]): String = { - dotnetRenderParam(pp.param, pp.value) - } - - //noinspection ScalaStyle - def dotnetRenderParam[T](p: Param[T], v: T): String = { - import DefaultParamInfo._ - - p match { - case pwp: DotnetWrappableParam[T] => - "." + pwp.dotnetTestSetterLine(v) - case _: StringArrayParam | _: DoubleArrayParam | _: IntArrayParam | - _: DoubleArrayArrayParam => - s""".Set${p.name.capitalize}(new ${getGeneralParamInfo(p).dotnetType} - | ${DotnetWrappableParam.dotnetDefaultRender(v, p)})""".stripMargin - case _ => - s""".Set${p.name.capitalize}(${DotnetWrappableParam.dotnetDefaultRender(v, p)})""" - } - } - def rRenderParam[T](pp: ParamPair[T]): String = { rRenderParam(pp.param, pp.value) } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala index 949acab389..7fb604a55d 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/codegen/Wrappable.scala @@ -490,4 +490,4 @@ trait RWrappable extends BaseWrappable { } -trait Wrappable extends PythonWrappable with RWrappable with DotnetWrappable +trait Wrappable extends PythonWrappable with RWrappable diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala index 6041b9b307..2605f3b6bf 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/env/PackageUtils.scala @@ -28,7 +28,7 @@ object PackageUtils { private val OnnxProtobufMavenCoordinate = s"$PackageGroup:$OnnxProtobufPackageName:$OnnxProtobufVersion" private val OnnxProtobufRepository: String = SparkMLRepository - // Note: this is also hardwired in core/src/main/dotnet/test/E2ETestUtils.cs AND website/doctest.py + // Note: this is also hardwired in website/doctest.py // val SparkMavenPackageList = s"$PackageMavenCoordinate" val SparkMavenPackageList = Array(PackageMavenCoordinate, AvroCoordinate).mkString(",") val SparkMavenRepositoryList = s"$PackageRepository" diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/serialize/ComplexParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/serialize/ComplexParam.scala index 81611d707b..58168b9e9e 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/core/serialize/ComplexParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/core/serialize/ComplexParam.scala @@ -32,20 +32,4 @@ abstract class ComplexParam[T: TypeTag](parent: Params, name: String, doc: Strin throw new NotImplementedError("The parameter is a ComplexParam and cannot be JSON decoded.") } - override private[ml] def dotnetType: String = "object" - - override private[ml] def dotnetGetter(capName: String): String = { - dotnetType match { - case "object" => - s"""public object Get$capName() => Reference.Invoke(\"get$capName\");""".stripMargin - case _ => - s"""|public $dotnetReturnType Get$capName() => - | new $dotnetReturnType((JvmObjectReference)Reference.Invoke(\"get$capName\")); - |""".stripMargin - } - } - - private[ml] def dotnetTestValue(v: T): String = - throw new NotImplementedError("No translation found for complex parameter") - } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala index f7df32907a..9f89a2be75 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/explainers/ICEExplainer.scala @@ -25,17 +25,13 @@ class ICENumericFeaturesParam(parent: Params, name: String, doc: String, isValid: Seq[ICENumericFeature] => Boolean = _.forall(_.validate)) extends - TypedArrayParam[ICENumericFeature](parent, name, doc, isValid) { - override private[ml] def dotnetType = "ICENumericFeature[]" -} + TypedArrayParam[ICENumericFeature](parent, name, doc, isValid) class ICECategoricalFeaturesParam(parent: Params, name: String, doc: String, isValid: Seq[ICECategoricalFeature] => Boolean = _.forall(_.validate)) extends - TypedArrayParam[ICECategoricalFeature](parent, name, doc, isValid) { - override private[ml] def dotnetType = "ICECategoricalFeature[]" -} + TypedArrayParam[ICECategoricalFeature](parent, name, doc, isValid) trait ICEFeatureParams extends Params with HasNumSamples { diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ArrayMapParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ArrayMapParam.scala index 46403dd8fd..574dd41bc3 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ArrayMapParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ArrayMapParam.scala @@ -70,51 +70,6 @@ class ArrayMapParam(parent: String, name: String, doc: String, isValid: Array[Ma jsonValue.convertTo[Seq[Map[String, Any]]].toArray } - private[ml] def dotnetType: String = "Dictionary[]" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) - | => $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", - | (object)value.Select(_ => _.ToJavaHashMap()).ToArray().ToJavaArrayList())); - |""".stripMargin - } - - override private[ml] def dotnetGetter(capName: String): String = { - s"""|public $dotnetReturnType Get$capName() - |{ - | var jvmObjects = (JvmObjectReference[])Reference.Invoke(\"get$capName\"); - | var result = new Dictionary[jvmObjects.Length]; - | JvmObjectReference hashMap; - | JvmObjectReference[] keySet; - | Dictionary dic; - | object value; - | for (int i = 0; i < result.Length; i++) - | { - | hashMap = (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod( - | "org.apache.spark.api.dotnet.DotnetUtils", "convertToJavaMap", jvmObjects[i]); - | keySet = (JvmObjectReference[])( - | (JvmObjectReference)hashMap.Invoke("keySet")).Invoke("toArray"); - | dic = new Dictionary(); - | foreach (var k in keySet) - | { - | value = SparkEnvironment.JvmBridge.CallStaticJavaMethod( - | "org.apache.spark.api.dotnet.DotnetUtils", - | "mapScalaToJava", hashMap.Invoke("get", k)); - | dic.Add((string)k.Invoke("toString"), value); - | } - | result[i] = dic; - | } - | return result; - |} - |""".stripMargin - } - - private[ml] def dotnetTestValue(v: Array[Map[String, Any]]): String = - s"""new $dotnetType - | ${DotnetWrappableParam.dotnetDefaultRender(v, this)}""".stripMargin - override def rValue(v: Array[Map[String, Any]]): String = { implicit val defaultFormat = seqFormat[Map[String, Any]] RWrappableParam.rDefaultRender(v) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ArrayParamMapParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ArrayParamMapParam.scala index 8d9143f3ab..2ae3fee04a 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ArrayParamMapParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ArrayParamMapParam.scala @@ -16,8 +16,7 @@ import scala.reflect.runtime.universe.typeTag * types but not Array of ParamMaps. */ class ArrayParamMapParam(parent: Params, name: String, doc: String, isValid: Array[ParamMap] => Boolean) - extends ComplexParam[Array[ParamMap]](parent, name, doc, isValid) with ParamEquality[Array[ParamMap]] - with ExternalDotnetWrappableParam[Array[ParamMap]] { + extends ComplexParam[Array[ParamMap]](parent, name, doc, isValid) with ParamEquality[Array[ParamMap]] { def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: Array[ParamMap]) => true) @@ -33,47 +32,6 @@ class ArrayParamMapParam(parent: Params, name: String, doc: String, isValid: Arr } } - override private[ml] def dotnetType: String = "ParamMap[]" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) - | => $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value.ToJavaArrayList())); - |""".stripMargin - } - - override private[ml] def dotnetGetter(capName: String): String = { - s"""|public $dotnetReturnType Get$capName() - |{ - | var jvmObjects = (JvmObjectReference[])Reference.Invoke(\"get$capName\"); - | var result = new ParamMap[jvmObjects.Length]; - | for (int i=0; i < jvmObjects.Length; i++) - | { - | result[i] = new ParamMap(jvmObjects[i]); - | } - | return result; - |} - |""".stripMargin - } - - override private[ml] def dotnetTestValue(v: Array[ParamMap]): String = { - s"""${name}Param""" - } - - override private[ml] def dotnetLoadLine(modelNum: Int): String = { - s"""var ${name}ParamLoaded = (JvmObjectReference[])_jvm.CallStaticJavaMethod( - | "com.microsoft.azure.synapse.ml.param.ArrayParamMapParam", - | "loadForTest", - | _spark, - | Path.Combine(TestDataDir, "model-$modelNum.model", "complexParams", "$name")); - |var ${name}Param = new ParamMap[${name}ParamLoaded.Length]; - |for (int i = 0; i < ${name}ParamLoaded.Length; i++) - |{ - | ${name}Param[i] = new ParamMap(${name}ParamLoaded[i]); - |}""".stripMargin - } - } // For auto test generation usage only, in production we diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ByteArrayParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ByteArrayParam.scala index df94bc3cf4..c4e4218301 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ByteArrayParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ByteArrayParam.scala @@ -15,8 +15,4 @@ class ByteArrayParam(parent: Params, name: String, doc: String, isValid: Array[B def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: Array[Byte]) => true) - override private[ml] def dotnetTestValue(v: Array[Byte]): String = - s"""new byte[] - | ${DotnetWrappableParam.dotnetDefaultRender(v, this)}""".stripMargin - } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/DataFrameParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/DataFrameParam.scala index eb867b0d4c..5df0771b33 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/DataFrameParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/DataFrameParam.scala @@ -142,17 +142,6 @@ class DataFrameParam(parent: Params, name: String, doc: String, isValid: DataFra """.stripMargin } - override private[ml] def dotnetTestValue(v: DataFrame): String = { - s"""${name}DF""" - } - - override private[ml] def dotnetLoadLine(modelNum: Int): String = { - s"""var ${name}DF = _spark.Read().Parquet( - | Path.Combine(TestDataDir, "model-$modelNum.model", "complexParams", "$name"));""".stripMargin - } - - override private[ml] def dotnetType: String = "DataFrame" - override def assertEquality(v1: Any, v2: Any): Unit = { (v1, v2) match { case (df1: Dataset[_], df2: Dataset[_]) => diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/DataTypeParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/DataTypeParam.scala index 42310caea6..f0150042f8 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/DataTypeParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/DataTypeParam.scala @@ -14,47 +14,4 @@ class DataTypeParam(parent: Params, name: String, doc: String, isValid: DataType def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: DataType) => true) - override private[ml] def dotnetType: String = "DataType" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) => - | $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", - | DataType.FromJson(Reference.Jvm, value.Json))); - |""".stripMargin - } - - override private[ml] def dotnetGetter(capName: String): String = { - s"""|public $dotnetReturnType Get$capName() - |{ - | var jvmObject = (JvmObjectReference)Reference.Invoke(\"get$capName\"); - | var json = (string)jvmObject.Invoke(\"json\"); - | return DataType.ParseDataType(json); - |} - |""".stripMargin - } - - override private[ml] def dotnetTestValue(v: DataType): String = { - v match { - case st: StructType => - st.fields.map( - x => s"""new StructField("${x.name}", new ${x.dataType}())""".stripMargin).mkString(",") - case _ => - s"""new $v()""".stripMargin - } - } - - override private[ml] def dotnetTestSetterLine(v: DataType): String = { - v match { - case _: StructType => - s"""Set${dotnetName(v).capitalize}( - | new StructType(new List - | {${dotnetTestValue(v)}}))""".stripMargin - case _ => - s"""Set${dotnetName(v).capitalize}( - | ${dotnetTestValue(v)})""".stripMargin - } - } - } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EstimatorArrayParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EstimatorArrayParam.scala index e7826713ee..c0a5cd7018 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EstimatorArrayParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EstimatorArrayParam.scala @@ -19,39 +19,4 @@ class EstimatorArrayParam(parent: Params, name: String, doc: String, isValid: Ar /** Creates a param pair with the given value (for Java). */ def w(value: java.util.List[Estimator[_]]): ParamPair[Array[Estimator[_]]] = w(value.asScala.toArray) - override private[ml] def dotnetType: String = "IEstimator[]" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) - | => $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value.ToJavaArrayList())); - |""".stripMargin - } - - override private[ml] def dotnetGetter(capName: String): String = { - val dType = "IEstimator" - val parentType = "JavaPipelineStage" - s"""|public $dotnetReturnType Get$capName() - |{ - | var jvmObjects = (JvmObjectReference[])Reference.Invoke(\"get$capName\"); - | var result = new $dType[jvmObjects.Length]; - | Dictionary classMapping = JvmObjectUtils.ConstructJavaClassMapping( - | typeof($parentType), - | "s_className"); - | for (int i=0; i < jvmObjects.Length; i++) - | { - | if (JvmObjectUtils.TryConstructInstanceFromJvmObject( - | jvmObjects[i], - | classMapping, - | out $dType instance)) - | { - | result[i] = instance; - | } - | } - | return result; - |} - |""".stripMargin - } - } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EstimatorParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EstimatorParam.scala index 8f58db39d9..1d308a1302 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EstimatorParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EstimatorParam.scala @@ -36,24 +36,6 @@ trait PipelineStageWrappable[T <: PipelineStage] """.stripMargin } - override private[ml] def dotnetTestValue(v: T): String = { - s"""${name}Model""" - } - - override private[ml] def dotnetLoadLine(modelNum: Int): String = - throw new NotImplementedError("Implement dotnetLoadLine(modelNum: Int, testDataDir: String) method instead") - - private[ml] def dotnetLoadLine(modelNum: Int, testDataDir: String): String = { - val underlyingType = Pipeline.load(s"$testDataDir/model-$modelNum.model/complexParams/$name") - .getStages.head.getClass.getTypeName.split(".".toCharArray).last - - s""" - |var ${name}Loaded = Pipeline.Load( - | Path.Combine(TestDataDir, "model-$modelNum.model", "complexParams", "$name")); - |var ${name}Model = ($underlyingType)${name}Loaded.GetStages()[0]; - |""".stripMargin - } - override def assertEquality(v1: Any, v2: Any): Unit = { (v1, v2) match { case (e1: PipelineStage, e2: PipelineStage) => @@ -75,21 +57,6 @@ class EstimatorParam(parent: Params, name: String, doc: String, isValid: Estimat def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: Estimator[_ <: Model[_]]) => true) - override private[ml] def dotnetType: String = "JavaEstimator" - - override private[ml] def dotnetReturnType: String = "IEstimator" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) where M : JavaModel => - | $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value)); - |""".stripMargin - } - - override private[ml] def dotnetGetter(capName: String): String = - dotnetGetterHelper(dotnetReturnType, "JavaPipelineStage", capName) - def rValue(v: Estimator[_]): String = { s"""${name}Model""" } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EvaluatorParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EvaluatorParam.scala index 420da74b71..edc61cdb36 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EvaluatorParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/EvaluatorParam.scala @@ -18,7 +18,7 @@ import scala.reflect.runtime.universe.typeTag */ class EvaluatorParam(parent: Params, name: String, doc: String, isValid: Evaluator => Boolean) extends ComplexParam[Evaluator](parent, name, doc, isValid) - with ParamEquality[Evaluator] with ExternalDotnetWrappableParam[Evaluator] { + with ParamEquality[Evaluator] { def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: Evaluator) => true) @@ -32,32 +32,6 @@ class EvaluatorParam(parent: Params, name: String, doc: String, isValid: Evaluat } } - override private[ml] def dotnetType: String = "JavaEvaluator" - - override private[ml] def dotnetGetter(capName: String): String = - dotnetGetterHelper(dotnetReturnType, dotnetReturnType, capName) - - override private[ml] def dotnetTestValue(v: Evaluator): String = { - s"""${name}Param""" - } - - override private[ml] def dotnetLoadLine(modelNum: Int): String = - throw new NotImplementedError("Implement dotnetLoadLine(modelNum: Int, testDataDir: String) method instead") - - private[ml] def dotnetLoadLine(modelNum: Int, testDataDir: String): String = { - val underlyingType = EvaluatorParam.loadForTest( - SparkSession.builder().getOrCreate(), - s"$testDataDir/model-$modelNum.model/complexParams/$name") - .getClass.getTypeName.split(".".toCharArray).last - - s"""var ${name}ParamLoaded = (JvmObjectReference)_jvm.CallStaticJavaMethod( - | "com.microsoft.azure.synapse.ml.param.EvaluatorParam", - | "loadForTest", - | _spark, - | Path.Combine(TestDataDir, "model-$modelNum.model", "complexParams", "$name")); - |var ${name}Param = new $underlyingType(${name}ParamLoaded);""".stripMargin - } - } // For auto test generation usage only, in production we diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/JsonEncodableParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/JsonEncodableParam.scala index 7832632b6c..da51cc861f 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/JsonEncodableParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/JsonEncodableParam.scala @@ -94,96 +94,6 @@ class ServiceParam[T: TypeTag](parent: Params, } } - override private[ml] def dotnetTestValue(v: Either[T, String]): String = { - v match { - case Left(t) => DotnetWrappableParam.dotnetDefaultRender(t) - case Right(n) => s""""$n"""" - } - } - - override private[ml] def dotnetName(v: Either[T, String]): String = { - v match { - case Left(_) => name - case Right(_) => name + "Col" - } - } - - //scalastyle:off cyclomatic.complexity - override private[ml] def dotnetTestSetterLine(v: Either[T, String]): String = { - v match { - case Left(_) => typeOf[T] match { - case t if t =:= typeOf[Array[String]] | t =:= typeOf[Seq[String]] => - s"""Set${dotnetName(v).capitalize}(new string[] ${dotnetTestValue(v)})""" - case t if t =:= typeOf[Array[Double]] => - s"""Set${dotnetName(v).capitalize}(new double[] ${dotnetTestValue(v)})""" - case t if t =:= typeOf[Array[Int]] => - s"""Set${dotnetName(v).capitalize}(new int[] ${dotnetTestValue(v)})""" - case t if t =:= typeOf[Array[Byte]] => - s"""Set${dotnetName(v).capitalize}(new byte[] ${dotnetTestValue(v)})""" - case _ => s"""Set${dotnetName(v).capitalize}(${dotnetTestValue(v)})""" - } - case Right(_) => s"""Set${dotnetName(v).capitalize}(${dotnetTestValue(v)})""" - } - } - //scalastyle:on cyclomatic.complexity - - //scalastyle:off cyclomatic.complexity - private[ml] def dotnetType: String = typeOf[T].toString match { - case "String" => "string" - case "Boolean" => "bool" - case "Double" => "double" - case "Int" => "int" - case "Seq[String]" => "string[]" - case "Seq[Double]" => "double[]" - case "Seq[Int]" => "int[]" - case "Seq[Seq[Int]]" => "int[][]" - case "Array[Byte]" => "byte[]" - case "Seq[com.microsoft.azure.synapse.ml.services.anomaly.TimeSeriesPoint]" => "TimeSeriesPoint[]" - case "Seq[com.microsoft.azure.synapse.ml.services.translate.TargetInput]" => "TargetInput[]" - case "Seq[com.microsoft.azure.synapse.ml.services.translate.TextAndTranslation]" => "TextAndTranslation[]" - case _ => throw new Exception(s"unsupported type ${typeOf[T].toString}, please add implementation") - } - //scalastyle:on cyclomatic.complexity - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = - s"""|public $dotnetClassName Set$capName($dotnetType value) => - | $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value)); - |""".stripMargin - - private[ml] def dotnetSetterForSrvParamCol(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = - s"""|public $dotnetClassName Set${capName}Col(string value) => - | $dotnetClassWrapperName(Reference.Invoke(\"set${capName}Col\", value)); - |""".stripMargin - - override private[ml] def dotnetGetter(capName: String): String = { - dotnetType match { - case "TimeSeriesPoint[]" | - "TargetInput[]" | - "TextAndTranslation[]" | - "TextAnalyzeTask[]" => - s"""|public $dotnetType Get$capName() - |{ - | var jvmObject = (JvmObjectReference)Reference.Invoke(\"get$capName\"); - | var jvmObjects = (JvmObjectReference[])jvmObject.Invoke("array"); - | $dotnetType result = - | new ${dotnetType.substring(0, dotnetType.length - 2)}[jvmObjects.Length]; - | for (int i = 0; i < result.Length; i++) - | { - | result[i] = new ${dotnetType.substring(0, dotnetType.length - 2)}(jvmObjects[i]); - | } - | return result; - |} - |""".stripMargin - case _ => - s"""|public $dotnetType Get$capName() => - | ($dotnetType)Reference.Invoke(\"get$capName\"); - |""".stripMargin - } - } } @@ -200,35 +110,4 @@ class CognitiveServiceStructParam[T: TypeTag](parent: Params, override def rValue(v: T): String = RWrappableParam.rDefaultRender(v) - override private[ml] def dotnetGetter(capName: String): String = { - dotnetType match { - case "DiagnosticsInfo" => - s"""|public $dotnetType Get$capName() - |{ - | var jvmObject = (JvmObjectReference)Reference.Invoke(\"get$capName\"); - | return new $dotnetType(jvmObject); - |} - |""".stripMargin - case _ => - s"""|public $dotnetType Get$capName() => - | ($dotnetType)Reference.Invoke(\"get$capName\"); - |""".stripMargin - } - } - - override private[ml] def dotnetTestValue(v: T): String = DotnetWrappableParam.dotnetDefaultRender(v) - - override private[ml] def dotnetTestSetterLine(v: T): String = { - typeOf[T].toString match { - case t if t == "Seq[com.microsoft.azure.synapse.ml.services.TextAnalyzeTask]" => - s"""Set${dotnetName(v).capitalize}(new TextAnalyzeTask[]{${dotnetTestValue(v)}})""" - case _ => s"""Set${dotnetName(v).capitalize}(${dotnetTestValue(v)})""" - } - } - - private[ml] def dotnetType: String = typeOf[T].toString match { - case "Seq[com.microsoft.azure.synapse.ml.services.text.TextAnalyzeTask]" => "TextAnalyzeTask[]" - case "com.microsoft.azure.synapse.ml.services.anomaly.DiagnosticsInfo" => "DiagnosticsInfo" - case _ => throw new Exception(s"unsupported type ${typeOf[T].toString}, please add implementation") - } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/MapParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/MapParam.scala index 234fe1fc26..e3d5d265b8 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/MapParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/MapParam.scala @@ -31,40 +31,8 @@ class MapParam[K, V](parent: Params, name: String, doc: String, isValid: Map[K, json.parseJson.convertTo[Map[K, V]] } - private[ml] def dotnetType: String = "Dictionary" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) => - | $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value.ToJavaHashMap())); - |""".stripMargin - } - protected def valuesType = "object" - override private[ml] def dotnetGetter(capName: String): String = { - s"""|public $dotnetReturnType Get$capName() - |{ - | var jvmObject = (JvmObjectReference)Reference.Invoke(\"get$capName\"); - | var hashMap = (JvmObjectReference)SparkEnvironment.JvmBridge.CallStaticJavaMethod( - | "org.apache.spark.api.dotnet.DotnetUtils", "convertToJavaMap", jvmObject); - | var keySet = (JvmObjectReference[])( - | (JvmObjectReference)hashMap.Invoke("keySet")).Invoke("toArray"); - | var result = new $dotnetReturnType(); - | foreach (var k in keySet) - | { - | result.Add((string)k.Invoke("toString"), ($valuesType)hashMap.Invoke("get", k)); - | } - | return result; - |} - |""".stripMargin - } - - private[ml] def dotnetTestValue(v: Map[K, V]): String = - s"""new $dotnetType - | ${DotnetWrappableParam.dotnetDefaultRender(v, this)}""".stripMargin - } class StringStringMapParam(parent: Params, name: String, doc: String, isValid: Map[String, String] => Boolean) @@ -73,8 +41,6 @@ class StringStringMapParam(parent: Params, name: String, doc: String, isValid: M def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: Map[String, String]) => true) - override private[ml] def dotnetType: String = "Dictionary" - override protected def valuesType = "string" } @@ -85,8 +51,6 @@ class StringIntMapParam(parent: Params, name: String, doc: String, isValid: Map[ def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: Map[String, Int]) => true) - override private[ml] def dotnetType: String = "Dictionary" - override protected def valuesType = "int" } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ModelParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ModelParam.scala index 423fc5b09e..74803948b0 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ModelParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/ModelParam.scala @@ -18,19 +18,5 @@ class ModelParam(parent: Params, name: String, doc: String, isValid: Model[_ <: def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: Model[_ <: Model[_]]) => true) - override private[ml] def dotnetType: String = "JavaModel" - - override private[ml] def dotnetReturnType: String = "IModel" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) where M : JavaModel => - | $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value)); - |""".stripMargin - } - - override private[ml] def dotnetGetter(capName: String): String = - dotnetGetterHelper(dotnetReturnType, "JavaPipelineStage", capName) } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/PipelineStageParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/PipelineStageParam.scala index f4b3733c4e..3395d9f3e5 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/PipelineStageParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/PipelineStageParam.scala @@ -17,11 +17,6 @@ class PipelineStageParam(parent: Params, name: String, doc: String, isValid: Pip def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: PipelineStage) => true) - override private[ml] def dotnetType: String = "JavaPipelineStage" - - override private[ml] def dotnetGetter(capName: String): String = - dotnetGetterHelper(dotnetReturnType, dotnetReturnType, capName) - override def rLoadLine(modelNum: Int): String = { s""" |${name}Model <- ml_load(sc, path = file.path(test_data_dir, "model-$modelNum.model", "complexParams", "$name")) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TransformerArrayParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TransformerArrayParam.scala index 4cffade365..b865a7fa11 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TransformerArrayParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TransformerArrayParam.scala @@ -19,38 +19,4 @@ class TransformerArrayParam(parent: Params, name: String, doc: String, isValid: /** Creates a param pair with the given value (for Java). */ def w(value: java.util.List[Transformer]): ParamPair[Array[Transformer]] = w(value.asScala.toArray) - override private[ml] def dotnetType: String = "JavaTransformer[]" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) - | => $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value.ToJavaArrayList())); - |""".stripMargin - } - - override private[ml] def dotnetGetter(capName: String): String = { - val dType = "JavaTransformer" - s"""|public $dotnetReturnType Get$capName() - |{ - | var jvmObjects = (JvmObjectReference[])Reference.Invoke(\"get$capName\"); - | var result = new $dType[jvmObjects.Length]; - | Dictionary classMapping = JvmObjectUtils.ConstructJavaClassMapping( - | typeof($dType), - | "s_className"); - | for (int i=0; i < jvmObjects.Length; i++) - | { - | if (JvmObjectUtils.TryConstructInstanceFromJvmObject( - | jvmObjects[i], - | classMapping, - | out $dType instance)) - | { - | result[i] = instance; - | } - | } - | return result; - |} - |""".stripMargin - } - } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TransformerParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TransformerParam.scala index 3290d0a075..18306f052b 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TransformerParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TransformerParam.scala @@ -17,11 +17,6 @@ class TransformerParam(parent: Params, name: String, doc: String, isValid: Trans def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: Transformer) => true) - override private[ml] def dotnetType: String = "JavaTransformer" - - override private[ml] def dotnetGetter(capName: String): String = - dotnetGetterHelper(dotnetReturnType, "JavaTransformer", capName) - override def rLoadLine(modelNum: Int): String = { s""" |${name}Model <- ml_load(sc, path = file.path(test_data_dir, "model-$modelNum.model", "complexParams", "$name")) diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TypedArrayParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TypedArrayParam.scala index 1f7a39ab00..f0bef36a86 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TypedArrayParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/TypedArrayParam.scala @@ -22,21 +22,6 @@ abstract class TypedArrayParam[T: TypeTag](parent: Params, def w(v: java.util.ArrayList[T]): ParamPair[Seq[T]] = w(v.asScala) - // TODO: Implement render for this - override private[ml] def dotnetTestValue(v: Seq[T]): String = { - throw new NotImplementedError(s"No translation found for this TypedArrayParam: $v") - } - - override private[ml] def dotnetTestSetterLine(v: Seq[T]): String = { - typeOf[T].toString match { - case t if t == "com.microsoft.azure.synapse.ml.explainers.ICECategoricalFeature" => - s"""Set${dotnetName(v).capitalize}(new ICECategoricalFeature[]{${dotnetTestValue(v)}})""" - case t if t == "com.microsoft.azure.synapse.ml.explainers.ICENumericFeature" => - s"""Set${dotnetName(v).capitalize}(new ICENumericFeature[]{${dotnetTestValue(v)}})""" - case _ => throw new NotImplementedError(s"No translation found for this TypedArrayParam: $v") - } - } - override def rValue(v: Seq[T]): String = { implicit val defaultFormat = seqFormat[T] RWrappableParam.rDefaultRender(v) @@ -70,21 +55,6 @@ class TypedIntArrayParam(parent: Params, def w(v: java.util.ArrayList[Int]): ParamPair[Seq[Int]] = w(v.asScala) - private[ml] def dotnetType: String = "int[]" - - override private[ml] def dotnetGetter(capName: String): String = { - s"""|public $dotnetReturnType Get$capName() - |{ - | JvmObjectReference jvmObject = (JvmObjectReference)Reference.Invoke(\"get$capName\"); - | return ($dotnetReturnType)jvmObject.Invoke(\"array\"); - |} - |""".stripMargin - } - - private[ml] def dotnetTestValue(v: Seq[Int]): String = - s"""new $dotnetType - | ${DotnetWrappableParam.dotnetDefaultRender(v, this)}""".stripMargin - } class TypedDoubleArrayParam(parent: Params, @@ -96,18 +66,4 @@ class TypedDoubleArrayParam(parent: Params, def w(v: java.util.ArrayList[Double]): ParamPair[Seq[Double]] = w(v.asScala) - private[ml] def dotnetType: String = "double[]" - - override private[ml] def dotnetGetter(capName: String): String = { - s"""|public $dotnetReturnType Get$capName() - |{ - | JvmObjectReference jvmObject = (JvmObjectReference)Reference.Invoke(\"get$capName\"); - | return ($dotnetReturnType)jvmObject.Invoke(\"array\"); - |} - |""".stripMargin - } - - private[ml] def dotnetTestValue(v: Seq[Double]): String = - s"""new $dotnetType - | ${DotnetWrappableParam.dotnetDefaultRender(v, this)}""".stripMargin } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UDFParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UDFParam.scala index 08f53682ba..44c76d36b8 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UDFParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UDFParam.scala @@ -20,7 +20,7 @@ import scala.reflect.runtime.universe.typeTag */ class UDFParam(parent: Params, name: String, doc: String, isValid: UserDefinedFunction => Boolean) extends ComplexParam[UserDefinedFunction](parent, name, doc, isValid) - with ParamEquality[UserDefinedFunction] with ExternalDotnetWrappableParam[UserDefinedFunction] { + with ParamEquality[UserDefinedFunction] { def this(parent: Params, name: String, doc: String) = this(parent, name, doc, (_: UserDefinedFunction) => true) @@ -37,40 +37,6 @@ class UDFParam(parent: Params, name: String, doc: String, isValid: UserDefinedFu } } - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - val invokeMethod = capName match { - case "UdfScala" => "setUDF" - case "TransformFunc" => "setTransform" - case "TransformSchemaFunc" => "setTransformSchema" - case _ => s"set$capName" - } - s"""|public $dotnetClassName Set$capName($dotnetType value) => - | $dotnetClassWrapperName(Reference.Invoke(\"$invokeMethod\", (object)value)); - |""".stripMargin - } - - override private[ml] def dotnetTestValue(v: UserDefinedFunction): String = { - name match { - case "handler" =>s"""${name}Param""" - case _ => super.dotnetTestValue(v) - } - } - - override private[ml] def dotnetLoadLine(modelNum: Int): String = { - name match { - case "handler" => - s"""var ${name}Param = _jvm.CallStaticJavaMethod( - | "com.microsoft.azure.synapse.ml.param.UDFParam", - | "loadForTest", - | _spark, - | Path.Combine(TestDataDir, "model-$modelNum.model", "complexParams", "$name"));""".stripMargin - // TODO: FIX OTHER UDFParams - case _ => "" - } - - } } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UntypedArrayParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UntypedArrayParam.scala index 861a7f676f..3894d03737 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UntypedArrayParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/UntypedArrayParam.scala @@ -78,32 +78,4 @@ class UntypedArrayParam(parent: Params, name: String, doc: String, isValid: Arra json.parseJson.convertTo[Array[Any]] } - private[ml] def dotnetType: String = "object[]" - - override private[ml] def dotnetSetter(dotnetClassName: String, - capName: String, - dotnetClassWrapperName: String): String = { - s"""|public $dotnetClassName Set$capName($dotnetType value) - | => $dotnetClassWrapperName(Reference.Invoke(\"set$capName\", (object)value.ToJavaArrayList())); - |""".stripMargin - } - - override private[ml] def dotnetGetter(capName: String): String = { - s"""|public $dotnetReturnType Get$capName() - |{ - | var jvmObjects = (JvmObjectReference[])Reference.Invoke(\"get$capName\"); - | var result = new object[jvmObjects.Length]; - | for (int i = 0; i < result.Length; i++) - | { - | result[i] = SparkEnvironment.JvmBridge.CallStaticJavaMethod( - | "org.apache.spark.api.dotnet.DotnetUtils", "mapScalaToJava", (object)jvmObjects[i]); - | } - | return result; - |} - |""".stripMargin - } - - private[ml] def dotnetTestValue(v: Array[Any]): String = - s"""new $dotnetType - | ${DotnetWrappableParam.dotnetDefaultRender(v, this)}""".stripMargin } diff --git a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/WrappableParam.scala b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/WrappableParam.scala index 6eadcbe062..b03a12f6dc 100644 --- a/core/src/main/scala/com/microsoft/azure/synapse/ml/param/WrappableParam.scala +++ b/core/src/main/scala/com/microsoft/azure/synapse/ml/param/WrappableParam.scala @@ -5,12 +5,10 @@ package com.microsoft.azure.synapse.ml.param // Wrapper for codegen system trait WrappableParam[T] - extends DotnetWrappableParam[T] - with RWrappableParam[T] + extends RWrappableParam[T] with PythonWrappableParam[T] trait ExternalWrappableParam[T] extends ExternalPythonWrappableParam[T] - with ExternalDotnetWrappableParam[T] with ExternalRWrappableParam[T] diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala index ee854f10bd..8fb67ccdaf 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/Secrets.scala @@ -62,7 +62,6 @@ object Secrets { lazy val AzureMapsKey: String = getSecret("azuremaps-api-key") lazy val PowerbiURL: String = getSecret("powerbi-url") lazy val AdbToken: String = getSecret("adb-token") - lazy val SynapseStorageKey: String = getSecret("synapse-storage-key") lazy val MADTestStorageKey: String = getSecret("madtest-storage-key") diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/causal/VerifyOrthoDMLEstimator.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/causal/VerifyOrthoDMLEstimator.scala index 638f2b6151..1f2e3c59fd 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/causal/VerifyOrthoDMLEstimator.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/causal/VerifyOrthoDMLEstimator.scala @@ -113,7 +113,6 @@ class VerifyOrthoDMLEstimator extends EstimatorFuzzing[OrthoForestDMLEstimator] .setMinSamplesLeaf(100), ppfit, ppfit)) - override def dotnetTestObjects(): Seq[TestObject[OrthoForestDMLEstimator]] = Seq() override def reader: MLReadable[_] = OrthoForestDMLEstimator diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/DotnetTestGen.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/DotnetTestGen.scala deleted file mode 100644 index ae88ba7882..0000000000 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/DotnetTestGen.scala +++ /dev/null @@ -1,145 +0,0 @@ -// Copyright (C) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. See LICENSE in project root for information. - -package com.microsoft.azure.synapse.ml.codegen - -import com.microsoft.azure.synapse.ml.codegen.CodegenConfigProtocol._ -import com.microsoft.azure.synapse.ml.core.env.FileUtilities._ -import com.microsoft.azure.synapse.ml.core.test.base.TestBase -import com.microsoft.azure.synapse.ml.core.test.fuzzing.DotnetTestFuzzing -import com.microsoft.azure.synapse.ml.core.utils.JarLoadingUtils.instantiateServices -import org.apache.commons.io.FileUtils -import spray.json._ - -import java.io.File - - -object DotnetTestGen { - - import CodeGenUtils._ - - def generateDotnetTests(conf: CodegenConfig): Unit = { - instantiateServices[DotnetTestFuzzing[_]](conf.jarName).foreach { ltc => - try { - ltc.makeDotnetTestFile(conf) - } catch { - case _: NotImplementedError => - println(s"ERROR: Could not generate test for ${ltc.testClassName} because of Complex Parameters") - } - } - } - - def generateDotnetHelperFile(conf: CodegenConfig): Unit = { - val dir = new File(conf.dotnetTestDir, "SynapseMLtest") - if (!dir.exists()) { - dir.mkdirs() - } - writeFile(new File(dir, "SparkFixtureHelper.cs"), - s""" - |// Copyright (C) Microsoft Corporation. All rights reserved. - |// Licensed under the MIT License. See LICENSE in project root for information. - | - |using SynapseMLtest.Utils; - |using Xunit; - | - |namespace SynapseMLtest.Helper - |{ - | [CollectionDefinition("SynapseML Tests")] - | public class SynapseMLCollection: ICollectionFixture - | { - | // This class has no code, and is never created. Its purpose is simply - | // to be the place to apply [CollectionDefinition] and all the - | // ICollectionFixture<> interfaces. - | } - |} - |""".stripMargin) - } - - // scalastyle:off method.length - def generateDotnetTestProjFile(conf: CodegenConfig): Unit = { - val dir = new File(conf.dotnetTestDir, "SynapseMLtest") - if (!dir.exists()) { - dir.mkdirs() - } - val curProject = conf.name.split("-").drop(1).map(s => s.capitalize).mkString("") - // TODO: update SynapseML.DotnetBase version whenever we upload a new one - val referenceCore = conf.name match { - case "synapseml-opencv" | "synapseml-deep-learning" => - s"""""" - case _ => "" - } - // scalastyle:off line.size.limit - writeFile(new File(dir, "TestProjectSetup.csproj"), - s""" - | - | - | netcoreapp3.1 - | 9.0 - | SynapseML.$curProject.Test - | - | - | - | - | - | - | - | - | - | all - | runtime; build; native; contentfiles; analyzers - | - | - | - | - | - | $referenceCore - | - | - | - | - | Microsoft.Spark;SynapseML.DotnetBase;SynapseML.DotnetE2ETest;SynapseML.$curProject - | - | - | - | false - | - | - | - | - | - | - |""".stripMargin, StandardOpenOption.CREATE) - // scalastyle:on line.size.limit - } - // scalastyle:on method.length - - def generateLog4jPropertiesFile(conf: CodegenConfig): Unit = { - val dir = join(conf.dotnetTestDir, "SynapseMLtest", "Resources") - if (!dir.exists()) { - dir.mkdirs() - } - writeFile(new File(dir, "log4j.properties"), - s"""log4j.appender.stdout=org.apache.log4j.ConsoleAppender - |log4j.appender.stdout.Target=System.out - |log4j.appender.stdout.layout=org.apache.log4j.PatternLayout - |log4j.appender.stdout.layout.ConversionPattern=%d{HH:mm:ss} %-5p %c{1}:%L - %m%n - | - |log4j.rootLogger=WARN, stdout - |log4j.logger.org.apache.spark=WARN, stdout - |log4j.logger.com.microsoft=INFO, stdout - |""".stripMargin, StandardOpenOption.CREATE) - } - - def main(args: Array[String]): Unit = { - val conf = args.head.parseJson.convertTo[CodegenConfig] - clean(conf.dotnetTestDataDir) - clean(conf.dotnetTestDir) - generateDotnetTests(conf) - TestBase.stopSparkSession() - if (toDir(conf.dotnetTestOverrideDir).exists()) - FileUtils.copyDirectoryToDirectory(toDir(conf.dotnetTestOverrideDir), toDir(conf.dotnetTestDir)) - generateDotnetTestProjFile(conf) - generateDotnetHelperFile(conf) - generateLog4jPropertiesFile(conf) - } -} diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/TestGen.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/TestGen.scala index a6b549b132..1c6c1fbd86 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/TestGen.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/codegen/TestGen.scala @@ -13,7 +13,6 @@ import spray.json._ object TestGen { import CodeGenUtils._ - import DotnetTestGen._ import PyTestGen._ import RTestGen._ @@ -28,17 +27,13 @@ object TestGen { clean(conf.testDataDir) clean(conf.testDir) generatePythonTests(conf) - generateDotnetTests(conf) generateRTests(conf) TestBase.stopSparkSession() generatePyPackageData(conf) generateRPackageData(conf) copyOverrides(conf.pyTestOverrideDir, conf.pyTestDir) - copyOverrides(conf.dotnetTestOverrideDir, conf.dotnetTestDir) copyOverrides(conf.rTestOverrideDir, conf.rTestDir) makeInitFiles(conf) - generateDotnetTestProjFile(conf) - generateDotnetHelperFile(conf) } } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala index eae338ab3b..4429843ed9 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/core/test/fuzzing/Fuzzing.scala @@ -61,205 +61,6 @@ trait TestFuzzingUtil { } } -trait DotnetTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality with TestFuzzingUtil { - - def dotnetTestObjects(): Seq[TestObject[S]] - - def dotnetTestDataDir(conf: CodegenConfig): File = FileUtilities.join( - conf.dotnetTestDataDir, this.getClass.getName.split(".".toCharArray).last) - - def saveDotnetDataset(conf: CodegenConfig, df: DataFrame, name: String): Unit = { - df.write.mode("overwrite").parquet(new File(dotnetTestDataDir(conf), s"$name.parquet").toString) - } - - def saveDotnetModel(conf: CodegenConfig, model: S, name: String): Unit = { - model match { - case writable: MLWritable => - writable.write.overwrite().save(new File(dotnetTestDataDir(conf), s"$name.model").toString) - case _ => - throw new IllegalArgumentException(s"${model.getClass.getName} is not writable") - } - } - - def saveDotnetTestData(conf: CodegenConfig): Unit = { - dotnetTestDataDir(conf).mkdirs() - dotnetTestObjects().zipWithIndex.foreach { case (to, i) => - saveDotnetModel(conf, to.stage, s"model-$i") - if (testFitting) { - saveDotnetDataset(conf, to.fitDF, s"fit-$i") - saveDotnetDataset(conf, to.transDF, s"trans-$i") - to.validateDF.foreach(saveDotnetDataset(conf, _, s"val-$i")) - } - } - } - - def dotnetTestInstantiateModel(stage: S, num: Int, testDataDir: String): String = { - val fullParamMap = stage.extractParamMap().toSeq - val partialParamMap = stage.extractParamMap().toSeq.filter(pp => stage.get(pp.param).isDefined) - val fullStageName = stage.getClass.getName - .replace("com.microsoft.azure.synapse.ml", "Synapse.ML") - .replace("org.apache.spark.ml", "Microsoft.Spark.ML") - .split(".".toCharArray).map(capitalize).mkString(".") - - def instantiateModel(paramMap: Seq[ParamPair[_]]): String = { - val externalLoadlingLines = paramMap.flatMap { pp => - pp.param match { - case pp: PipelineStageWrappable[_] => - Some(pp.dotnetLoadLine(num, testDataDir)) - case ep: EvaluatorParam => - Some(ep.dotnetLoadLine(num, testDataDir)) - case ep: ExternalDotnetWrappableParam[_] => - Some(ep.dotnetLoadLine(num)) - case _ => None - } - }.mkString("\n") - s""" - |$externalLoadlingLines - | - |var model = new $fullStageName() - |${indent(paramMap.map(dotnetRenderParam(_)).mkString("\n"), 1)}; - | - |""".stripMargin - } - - try { - instantiateModel(fullParamMap) - } catch { - case _: NotImplementedError => - println(s"could not generate full test for $fullStageName, resorting to partial test") - instantiateModel(partialParamMap) - } - } - - - def makeDotnetTests(testObject: TestObject[S], num: Int, testDataDir: String): String = { - val stage = testObject.stage - val stageName = stage.getClass.getName.split(".".toCharArray).last - val fullStageName = stage.getClass.getName - .replace("com.microsoft.azure.synapse.ml", "Synapse.ML") - .replace("org.apache.spark.ml", "Microsoft.Spark.ML") - .split(".".toCharArray).map(capitalize).mkString(".") - val fittingTest = stage match { - case _: Estimator[_] if testFitting => - s""" - |var fdf = _spark.Read().Parquet(Path.Combine(TestDataDir, "fit-$num.parquet")); - |var tdf = _spark.Read().Parquet(Path.Combine(TestDataDir, "trans-$num.parquet")); - |model.Fit(fdf).Transform(tdf).Show(); - |""".stripMargin - case _: Transformer if testFitting => - s""" - |var tdf = _spark.Read().Parquet(Path.Combine(TestDataDir, "trans-$num.parquet")); - |model.Transform(tdf).Show(); - |""".stripMargin - case _ => "" - } - - s""" - |[Fact] - |public void Test${stageName}Constructor$num() - |{ - | void AssertCorrespondence($fullStageName model, string name, int num) - | { - | model.Write().Overwrite().Save(Path.Combine(TestDataDir, name)); - | _jvm.CallStaticJavaMethod("com.microsoft.azure.synapse.ml.core.utils.ModelEquality", - | "assertEqual", "${stage.getClass.getName}", Path.Combine(TestDataDir, name), - | Path.Combine(TestDataDir, String.Format("model-{0}.model", num))); - | } - |${indent(dotnetTestInstantiateModel(stage, num, testDataDir), 1)} - | - | AssertCorrespondence(model, "dotnet-constructor-model-$num.model", $num); - | - |${indent(fittingTest, 1)} - |} - | - |""".stripMargin - } - - //noinspection ScalaStyle - def makeDotnetTestFile(conf: CodegenConfig): Unit = { - spark - val testObjects = dotnetTestObjects() - if (testObjects.nonEmpty) { - saveDotnetTestData(conf) - val testDataDirString = dotnetTestDataDir(conf).toString - val generatedTests = testObjects.zipWithIndex.map { case (to, i) => - makeDotnetTests(to, i, testDataDirString) - } - val stage = dotnetTestObjects().head.stage - val importPath = stage.getClass.getName.split(".".toCharArray).dropRight(1) - val importPathString = importPath.mkString(".") - .replaceAllLiterally("com.microsoft.azure.synapse.ml", "Synapse.ML") - .replaceAllLiterally("org.apache.spark.ml", "Microsoft.Spark.ML") - .split(".".toCharArray).map(capitalize).mkString(".") - val externalLoaderImports = conf.name match { - case "synapseml-deep-learning" => - s"""using Synapse.ML.Onnx; - |using Synapse.ML.Stages; - |""".stripMargin - case _ => "" - } - val namespaceString = importPath.mkString(".") - .replaceAllLiterally("com.microsoft.azure.synapse.ml", "SynapseMLtest") - .replaceAllLiterally("org.apache.spark.ml", "Microsoft.Spark.ML.Test") - .split(".".toCharArray).map(capitalize).mkString(".") - val testClass = - s""" - |// Copyright (C) Microsoft Corporation. All rights reserved. - |// Licensed under the MIT License. See LICENSE in project root for information. - | - |using System; - |using System.IO; - |using System.Collections.Generic; - |using Microsoft.Spark.Interop.Ipc; - |using Microsoft.Spark.ML; - |using Microsoft.Spark.ML.Classification; - |using Microsoft.Spark.ML.Feature; - |using Microsoft.Spark.ML.Feature.Param; - |using Microsoft.Spark.ML.Recommendation; - |using Microsoft.Spark.ML.Regression; - |using Microsoft.Spark.Sql; - |using Microsoft.Spark.Sql.Types; - |using SynapseML.Dotnet.Utils; - | - |using Xunit; - |using SynapseMLtest.Utils; - |using SynapseMLtest.Helper; - |using $importPathString; - |$externalLoaderImports - | - |namespace $namespaceString - |{ - | - | [Collection("SynapseML Tests")] - | public class $testClassName - | { - | public const string TestDataDir = "${testDataDirString.replaceAllLiterally("\\", "\\\\")}"; - | private readonly SparkSession _spark; - | private readonly IJvmBridge _jvm; - | public $testClassName(SparkFixture fixture) - | { - | _spark = fixture.Spark; - | _jvm = fixture.Jvm; - | } - | - |${indent(generatedTests.mkString("\n\n"), 2)} - | } - | - |} - | - |""".stripMargin - - val testFolders = namespaceString.split(".".toCharArray) - val testDir = FileUtilities.join((Seq(conf.dotnetTestDir.toString) ++ testFolders.toSeq): _*) - testDir.mkdirs() - Files.write( - FileUtilities.join(testDir, "Test" + testClassName.capitalize + ".cs").toPath, - testClass.getBytes(StandardCharsets.UTF_8)) - } - } - -} - trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality with TestFuzzingUtil { def pyTestObjects(): Seq[TestObject[S]] @@ -796,7 +597,7 @@ trait GetterSetterFuzzing[S <: PipelineStage with Params] extends TestBase with } trait Fuzzing[S <: PipelineStage with MLWritable] extends SerializationFuzzing[S] - with ExperimentFuzzing[S] with PyTestFuzzing[S] with DotnetTestFuzzing[S] + with ExperimentFuzzing[S] with PyTestFuzzing[S] with RTestFuzzing[S] with GetterSetterFuzzing[S] { def testObjects(): Seq[TestObject[S]] @@ -805,8 +606,6 @@ trait Fuzzing[S <: PipelineStage with MLWritable] extends SerializationFuzzing[S def rTestObjects(): Seq[TestObject[S]] = testObjects() - def dotnetTestObjects(): Seq[TestObject[S]] = testObjects() - def serializationTestObjects(): Seq[TestObject[S]] = testObjects() def experimentTestObjects(): Seq[TestObject[S]] = testObjects() diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SharedNotebookE2ETestUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SharedNotebookE2ETestUtilities.scala index df72acab95..27130a3b2c 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SharedNotebookE2ETestUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SharedNotebookE2ETestUtilities.scala @@ -118,12 +118,20 @@ object SharedNotebookE2ETestUtilities { } } + private[ml] def exec(command: String, maxRetries: Int = 0, attempt: Int = 0): String = { + val osCommand = sys.props("os.name").toLowerCase match { + case x if x contains "windows" => Seq("cmd", "/C") ++ Seq(command) + case _ => Seq("bash", "-c", command) + } - def exec(command: String): String = { - val os = sys.props("os.name").toLowerCase - os match { - case x if x contains "windows" => Seq("cmd", "/C") ++ Seq(command) !! - case _ => command.!! + try { + osCommand.!! + } catch { + case e: RuntimeException if attempt < maxRetries => + println(s"Retrying after error: $e") + Thread.sleep(1000) + exec(command, maxRetries, attempt + 1) } } + } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseExtension/SynapseExtensionUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseExtension/SynapseExtensionUtilities.scala index 48683cc413..f2264025f4 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseExtension/SynapseExtensionUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseExtension/SynapseExtensionUtilities.scala @@ -258,8 +258,8 @@ object SynapseExtensionUtilities { val dest = s"$Folder/${notebook.getName}" exec(s"az storage fs file upload " + s" -s ${notebook.getAbsolutePath} -p $dest -f $StorageContainer " + - " --overwrite true " + - s" --account-name $StorageAccount --account-key ${Secrets.SynapseStorageKey}") + " --overwrite true --auth-mode login" + + s" --account-name $StorageAccount") s"https://$StorageAccount.blob.core.windows.net/$StorageContainer/$dest" } diff --git a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala index 6128f1cced..32dfd5dc09 100644 --- a/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala +++ b/core/src/test/scala/com/microsoft/azure/synapse/ml/nbtest/SynapseUtilities.scala @@ -178,7 +178,7 @@ object SynapseUtilities { exec(s"az storage fs file upload " + s" -s ${notebook.getAbsolutePath} -p $dest -f $StorageContainer " + " --overwrite true " + - s" --account-name $StorageAccount --account-key ${Secrets.SynapseStorageKey}") + s" --account-name $StorageAccount") val abfssPath = s"abfss://$StorageContainer@$StorageAccount.dfs.core.windows.net/$dest" val excludes: String = Seq( diff --git a/docs/Get Started/Install SynapseML.md b/docs/Get Started/Install SynapseML.md index e54849771f..29141d6c26 100644 --- a/docs/Get Started/Install SynapseML.md +++ b/docs/Get Started/Install SynapseML.md @@ -184,8 +184,3 @@ To try out SynapseML using the R autogenerated wrappers, [see our instructions](../../Reference/R%20Setup). Note: This feature is still under development and some necessary custom wrappers may be missing. -## C# (.NET) - -To try out SynapseML with .NET, follow the [.NET Installation Guide](../../Reference/Dotnet%20Setup). -Note: Some stages including AzureSearchWriter, DiagnosticInfo, UDPyF Param, ParamSpaceParam, BallTreeParam, -ConditionalBallTreeParam, LightGBMBooster Param are still under development and not exposed in .NET. diff --git a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMBoosterParam.scala b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMBoosterParam.scala index f9fbe49001..e2d0dfeb6a 100644 --- a/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMBoosterParam.scala +++ b/lightgbm/src/main/scala/com/microsoft/azure/synapse/ml/lightgbm/params/LightGBMBoosterParam.scala @@ -19,6 +19,5 @@ class LightGBMBoosterParam(parent: Params, name: String, doc: String, def this(parent: Params, name: String, doc: String) = this(parent, name, doc, { _ => true }) - override def dotnetType: String = "LightGBMBooster" } diff --git a/pipeline.yaml b/pipeline.yaml index 7148c02f00..d2bdb3b022 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -13,7 +13,6 @@ trigger: - "docs/Reference/Contributor Guide.md" - "docs/Reference/Developer Setup.md" - "docs/Reference/Docker Setup.md" - - "docs/Reference/Dotnet Setup.md" - CODEOWNERS pr: @@ -28,7 +27,6 @@ pr: - "docs/Reference/Contributor Guide.md" - "docs/Reference/Developer Setup.md" - "docs/Reference/Docker Setup.md" - - "docs/Reference/Dotnet Setup.md" - CODEOWNERS schedules: @@ -95,19 +93,34 @@ jobs: inputs: artifactsFeeds: SynapseML_PublicPackages mavenServiceConnections: SynapseML_PublicPackages-Feed-Connection - - bash: | - set -e - sudo apt-get install graphviz doxygen -y - source activate synapseml - sbt packagePython - sbt publishBlob publishDocs publishR publishPython uploadNotebooks - sbt genBuildInfo - echo "##vso[task.uploadsummary]$(pwd)/target/Build.md" - sbt publishLocalSigned - python tools/esrp/prepare_jar.py - displayName: Publish Artifacts + - task: AzureCLI@2 + displayName: 'Publish Artifacts 1' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + sudo apt-get install graphviz doxygen -y + source activate synapseml + sbt packagePython uploadNotebooks + env: + SYNAPSEML_ENABLE_PUBLISH: true + - task: AzureCLI@2 + displayName: 'Publish Artifacts 2' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + source activate synapseml + sbt -DskipCodegen=true publishBlob publishDocs publishR publishPython + sbt genBuildInfo + echo "##vso[task.uploadsummary]$(pwd)/target/Build.md" + sbt -DskipCodegen=true publishLocalSigned + python tools/esrp/prepare_jar.py env: - STORAGE-KEY: $(storage-key) NEXUS-UN: $(nexus-un) NEXUS-PW: $(nexus-pw) PGP-PRIVATE: $(pgp-private) @@ -160,20 +173,7 @@ jobs: - template: templates/update_cli.yml - template: templates/conda.yml - template: templates/kv.yml - - bash: | - set -e - source activate synapseml - sbt packagePython - sbt publishBlob - displayName: Publish Blob Artifacts - env: - STORAGE-KEY: $(storage-key) - NEXUS-UN: $(nexus-un) - NEXUS-PW: $(nexus-pw) - PGP-PRIVATE: $(pgp-private) - PGP-PUBLIC: $(pgp-public) - PGP-PW: $(pgp-pw) - SYNAPSEML_ENABLE_PUBLISH: true + - template: templates/publish.yml - task: AzureCLI@2 displayName: 'E2E' inputs: @@ -430,71 +430,6 @@ jobs: condition: succeededOrFailed() - template: templates/codecov.yml -# -#- job: DotnetTests -# timeoutInMinutes: 120 -# cancelTimeoutInMinutes: 0 -# condition: eq(variables.runTests, 'True') -# pool: -# vmImage: ubuntu-20.04 -# strategy: -# matrix: -# core: -# PACKAGE: "core" -# deep-learning: -# PACKAGE: "deepLearning" -# lightgbm: -# PACKAGE: "lightgbm" -# opencv: -# PACKAGE: "opencv" -# vw: -# PACKAGE: "vw" -# cognitive: -# PACKAGE: "cognitive" -# steps: -# - task: ShellScript@2 -# inputs: -# scriptPath: tools/dotnet/dotnetSetup.sh -# - task: AzureKeyVault@1 -# condition: and(eq(variables.isMaster, true), startsWith(variables['tag'], 'v')) -# inputs: -# azureSubscription: 'SynapseML Build' -# keyVaultName: mmlspark-keys -# - task: AzureCLI@1 -# displayName: 'Test Dotnet Code' -# timeoutInMinutes: 30 -# inputs: -# azureSubscription: 'SynapseML Build' -# scriptLocation: inlineScript -# publishTestResults: true -# inlineScript: | -# set -e -# echo "SPARK_HOME=$SPARK_HOME" -# echo "DOTNET_WORKER_DIR=$DOTNET_WORKER_DIR" -# sbt coverage publishDotnetTestBase -# sbt publishLocal -# sbt "project $(PACKAGE)" coverage publishDotnet -# export SBT_OPTS="-XX:+UseG1GC" -# echo "##vso[task.setvariable variable=SBT_OPTS]$SBT_OPTS" -# (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) -# (sbt "project $(PACKAGE)" coverage testDotnet) || (sbt "project $(PACKAGE)" coverage testDotnet) || (sbt "project $(PACKAGE)" coverage testDotnet) -# env: -# SYNAPSEML_ENABLE_PUBLISH: true -# - task: PublishTestResults@2 -# displayName: 'Publish Test Results' -# inputs: -# testRunner: VSTest -# testResultsFiles: '**/dotnet_test_results_*.trx' -# failTaskOnFailedTests: true -# condition: succeededOrFailed() -# - task: AzureCLI@1 -# displayName: 'Generate Codecov report' -# inputs: -# azureSubscription: 'SynapseML Build' -# scriptLocation: inlineScript -# inlineScript: 'sbt coverageReport' -# condition: succeededOrFailed() -# - template: templates/codecov.yml - job: RTests timeoutInMinutes: 60 @@ -572,6 +507,7 @@ jobs: - template: templates/update_cli.yml - template: templates/conda.yml - template: templates/kv.yml + - template: templates/publish.yml - task: AzureCLI@2 displayName: 'Test Website Samples' timeoutInMinutes: 30 @@ -580,10 +516,6 @@ jobs: scriptLocation: inlineScript scriptType: bash inlineScript: | - set -e - source activate synapseml - sbt packagePython - sbt publishBlob (timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) || (echo "retrying" && timeout 5m sbt setup) (sbt coverage testWebsiteDocs) - task: PublishTestResults@2 diff --git a/project/CodegenPlugin.scala b/project/CodegenPlugin.scala index a66134c7c6..d7c84e7ec1 100644 --- a/project/CodegenPlugin.scala +++ b/project/CodegenPlugin.scala @@ -8,7 +8,7 @@ import spray.json._ import BuildUtils._ object CodegenConfigProtocol extends DefaultJsonProtocol { - implicit val CCFormat: RootJsonFormat[CodegenConfig] = jsonFormat9(CodegenConfig.apply) + implicit val CCFormat: RootJsonFormat[CodegenConfig] = jsonFormat8(CodegenConfig.apply) } import CodegenConfigProtocol._ @@ -20,7 +20,6 @@ case class CodegenConfig(name: String, version: String, pythonizedVersion: String, rVersion: String, - dotnetVersion: String, packageName: String) //noinspection ScalaStyle @@ -35,13 +34,10 @@ object CodegenPlugin extends AutoPlugin { val RInstallTag = Tags.Tag("rInstall") val TestGenTag = Tags.Tag("testGen") - val DotnetTestGenTag = Tags.Tag("dotnetTestGen") val PyCodeGenTag = Tags.Tag("pyCodeGen") val PyTestGenTag = Tags.Tag("pyTestGen") val RCodeGenTag = Tags.Tag("rCodeGen") val RTestGenTag = Tags.Tag("rTestGen") - val DotnetCodeGenTag = Tags.Tag("dotnetCodeGen") - val TestDotnetTag = Tags.Tag("testDotnet") object autoImport { val rVersion = settingKey[String]("R version") @@ -55,7 +51,6 @@ object CodegenPlugin extends AutoPlugin { val codegenArgs = settingKey[String]("codegenArgs") val testgenArgs = settingKey[String]("testgenArgs") - val targetDir = settingKey[File]("targetDir") val codegenDir = settingKey[File]("codegenDir") @@ -75,23 +70,15 @@ object CodegenPlugin extends AutoPlugin { val pyCodegen = TaskKey[Unit]("pyCodegen", "Generate python code") val pyTestgen = TaskKey[Unit]("pyTestgen", "Generate python tests") - val dotnetTestGen = TaskKey[Unit]("dotnetTestgen", "Generate dotnet tests") - val dotnetCodeGen = TaskKey[Unit]("dotnetCodegen", "Generate dotnet code") - val packageDotnet = TaskKey[Unit]("packageDotnet", "Generate dotnet nuget package") - val publishDotnet = TaskKey[Unit]("publishDotnet", "publish dotnet nuget package") - val testDotnet = TaskKey[Unit]("testDotnet", "test dotnet nuget package") - val mergeCodeDir = SettingKey[File]("mergeCodeDir") val mergePyCode = TaskKey[Unit]("mergePyCode", "copy python code to a destination") - val mergeDotnetCode = TaskKey[Unit]("mergeDotnetCode", "copy dotnet code to a destination") } import autoImport._ override lazy val globalSettings: Seq[Setting[_]] = Seq( Global / concurrentRestrictions ++= Seq( - Tags.limit(RInstallTag, 1), Tags.limit(TestGenTag, 1), Tags.limit(DotnetTestGenTag, 1), - Tags.limit(DotnetCodeGenTag, 1), Tags.limit(TestDotnetTag, 1)), + Tags.limit(RInstallTag, 1), Tags.limit(TestGenTag, 1)), Global / excludeLintKeys += publishMavenStyle ) @@ -108,7 +95,7 @@ object CodegenPlugin extends AutoPlugin { val testRunner = join("tools", "tests", "run_r_tests.R") if (rTestDir.exists()) { rCmd(activateCondaEnv, - Seq("Rscript", testRunner.getAbsolutePath), rTestDir, libPath) + Seq("Rscript", testRunner.getAbsolutePath), rTestDir, libPath) } } tag (RInstallTag) @@ -157,38 +144,6 @@ object CodegenPlugin extends AutoPlugin { } } tag (RTestGenTag) - def dotnetTestGenImpl: Def.Initialize[Task[Unit]] = Def.taskDyn { - (Compile / compile).value - (Test / compile).value - val arg = testgenArgs.value - Def.task { - (Test / runMain).toTask(s" com.microsoft.azure.synapse.ml.codegen.DotnetTestGen $arg").value - } - } tag (DotnetTestGenTag) - - def dotnetCodeGenImpl: Def.Initialize[Task[Unit]] = Def.taskDyn { - (Compile / compile).value - (Test / compile).value - val arg = codegenArgs.value - Def.task { - (Test / runMain).toTask(s" com.microsoft.azure.synapse.ml.codegen.DotnetCodegen $arg").value - } - } tag (DotnetCodeGenTag) - - def testDotnetImpl: Def.Initialize[Task[Unit]] = Def.task { - dotnetTestGen.value - val mainTargetDir = join(baseDirectory.value.getParent, "target") - runCmd( - Seq("dotnet", - "test", - s"${join(codegenDir.value, "test", "dotnet", "SynapseMLtest", "TestProjectSetup.csproj")}", - "--logger", - s""""trx;LogFileName=${join(mainTargetDir, s"dotnet_test_results_${name.value}.trx")}"""" - ), - new File(codegenDir.value, "test/dotnet/") - ) - } tag (TestDotnetTag) - override lazy val projectSettings: Seq[Setting[_]] = Seq( publishMavenStyle := true, codegenArgs := { @@ -200,7 +155,6 @@ object CodegenPlugin extends AutoPlugin { version.value, pythonizedVersion(version.value), rVersion.value, - dotnetedVersion(version.value), genPackageNamespace.value ).toJson.compactPrint }, @@ -213,7 +167,6 @@ object CodegenPlugin extends AutoPlugin { version.value, pythonizedVersion(version.value), rVersion.value, - dotnetedVersion(version.value), genPackageNamespace.value ).toJson.compactPrint }, @@ -232,11 +185,17 @@ object CodegenPlugin extends AutoPlugin { art)) }, codegen := (Def.taskDyn { - (Compile / compile).value - (Test / compile).value - val arg = codegenArgs.value - Def.task { - (Compile / runMain).toTask(s" com.microsoft.azure.synapse.ml.codegen.CodeGen $arg").value + if (sys.props.getOrElse("skipCodegen", "false") != "true") { + (Compile / compile).value + (Test / compile).value + val arg = codegenArgs.value + Def.task { + (Compile / runMain).toTask(s" com.microsoft.azure.synapse.ml.codegen.CodeGen $arg").value + } + } else { + Def.task { + streams.value.log.info("Skipping codegen.") + } } }.value), testgen := testGenImpl.value, @@ -298,11 +257,6 @@ object CodegenPlugin extends AutoPlugin { val destDir = join(mergeCodeDir.value, "src", "python", genPackageNamespace.value) FileUtils.copyDirectory(srcDir, destDir) }, - mergeDotnetCode := { - val srcDir = join(codegenDir.value, "src", "dotnet", genPackageNamespace.value) - val destDir = join(mergeCodeDir.value, "src", "dotnet", genPackageNamespace.value) - FileUtils.copyDirectory(srcDir, destDir) - }, pyCodegen := pyCodeGenImpl.value, testPython := { installPipPackage.value @@ -323,26 +277,6 @@ object CodegenPlugin extends AutoPlugin { rCodeGen := rCodeGenImpl.value, rTestGen := rTestGenImpl.value, testR := testRImpl.value, - dotnetCodeGen := dotnetCodeGenImpl.value, - dotnetTestGen := dotnetTestGenImpl.value, - testDotnet := testDotnetImpl.value, - packageDotnet := { - dotnetCodeGen.value - val destDotnetDir = join(targetDir.value, "classes", genPackageNamespace.value) - val dotnetSrcDir = join(codegenDir.value, "src", "dotnet") - if (destDotnetDir.exists()) FileUtils.forceDelete(destDotnetDir) - val sourceDotnetDir = join(dotnetSrcDir.getAbsolutePath, genPackageNamespace.value) - FileUtils.copyDirectory(sourceDotnetDir, destDotnetDir) - val packageDir = join(codegenDir.value, "package", "dotnet").absolutePath - packDotnetAssemblyCmd(packageDir, join(dotnetSrcDir, "synapse", "ml")) - }, - publishDotnet := { - packageDotnet.value - val dotnetPackageName = name.value.split("-").drop(1).map(s => s.capitalize).mkString("") - val packagePath = join(codegenDir.value, "package", "dotnet", - s"SynapseML.$dotnetPackageName.${dotnetedVersion(version.value)}.nupkg").absolutePath - publishDotnetAssemblyCmd(packagePath, join(mergeCodeDir.value, "sleet.json")) - }, targetDir := { (Compile / packageBin / artifactPath).value.getParentFile }, diff --git a/project/Secrets.scala b/project/Secrets.scala index 1fc12361c3..17e469dfc1 100644 --- a/project/Secrets.scala +++ b/project/Secrets.scala @@ -1,13 +1,13 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -import spray.json.DefaultJsonProtocol._ -import spray.json._ +import spray.json.* +import spray.json.DefaultJsonProtocol.* import java.io.{File, IOException, PrintWriter} import java.util.Base64 import scala.io.Source -import scala.sys.process._ +import scala.sys.process.* //scalastyle:off field.name object Secrets { @@ -18,11 +18,19 @@ object Secrets { lazy private val publishingEnabled: Boolean = sys.env.getOrElse(EnablePublishEnvVar, "false").toBoolean - protected def exec(command: String): String = { - val os = sys.props("os.name").toLowerCase - os match { - case x if x contains "windows" => Seq("cmd", "/C") ++ Seq(command) !! - case _ => command !! + protected def exec(command: String, maxRetries: Int = 2, attempt: Int = 0): String = { + val osCommand = sys.props("os.name").toLowerCase match { + case x if x contains "windows" => Seq("cmd", "/C") ++ Seq(command) + case _ => Seq("bash", "-c", command) + } + + try { + osCommand.!! + } catch { + case e: RuntimeException if attempt < maxRetries => + println(s"Retrying after error: $e") + Thread.sleep(1000) + exec(command, maxRetries, attempt + 1) } } @@ -161,7 +169,6 @@ object Secrets { lazy val nexusUsername: String = getSecret(NexusUsernameEnvVarName, NexusUsernameSecretName) lazy val nexusPassword: String = getSecret(NexusPasswordEnvVarName, NexusPasswordSecretName) lazy val pgpPassword: String = getSecret(PgpPasswordEnvVarName, PgpPasswordSecretName) - lazy val storageKey: String = getSecret(StorageKeyEnvVarName, StorageKeySecretName) lazy val pypiApiToken: String = getSecret(PypiApiEnvVarName, PypiApiSecretName) lazy val pgpPrivateFile: File = getPgpSecretFile(PgpPrivateSecretName, PgpPrivateEnvVarName) @@ -181,8 +188,6 @@ object Secrets { val PgpPrivateEnvVarName: String = "PGP-PRIVATE" val PgpPublicSecretName: String = "pgp-public" val PgpPublicEnvVarName: String = "PGP-PUBLIC" - val StorageKeySecretName: String = "storage-key" - val StorageKeyEnvVarName: String = "STORAGE-KEY" val PypiApiSecretName: String = "pypi-api-token" val PypiApiEnvVarName: String = "PYPI-API-TOKEN" val PublishToFeed: String = "PUBLISH-TO-FEED" diff --git a/project/build.scala b/project/build.scala index 86297d611e..7f0819f172 100644 --- a/project/build.scala +++ b/project/build.scala @@ -8,7 +8,7 @@ object BuildUtils { } def join(folders: String*): File = { - join(new File(folders.head), folders.tail: _*) + join(new File(folders.head), folders.tail *) } def isWindows: Boolean = { @@ -30,31 +30,34 @@ object BuildUtils { } } - def dotnetedVersion(version: String): String = { - version match { - case s if s.contains("-") => { - val versionArray = s.split("-".toCharArray) - versionArray.head + "-rc" + versionArray.drop(1).dropRight(1).mkString("") - } - case s => s - } - } def runCmd(cmd: Seq[String], wd: File = new File("."), - envVars: Map[String, String] = Map()): Unit = { - val pb = new ProcessBuilder() - .directory(wd) - .command(cmd: _*) - .redirectError(Redirect.INHERIT) - .redirectOutput(Redirect.INHERIT) - val env = pb.environment() - envVars.foreach(p => env.put(p._1, p._2)) - val result = pb.start().waitFor() - if (result != 0) { - println(s"Error: result code: ${result}") - throw new Exception(s"Execution resulted in non-zero exit code: ${result}") + envVars: Map[String, String] = Map(), + retries: Int = 0): Unit = { + + def executeAttempt(remainingRetries: Int): Unit = { + val pb = new ProcessBuilder() + .directory(wd) + .command(cmd *) + .redirectError(Redirect.INHERIT) + .redirectOutput(Redirect.INHERIT) + val env = pb.environment() + envVars.foreach(p => env.put(p._1, p._2)) + + try { + val result = pb.start().waitFor() + if (result != 0) { + throw new Exception(s"Execution resulted in non-zero exit code: $result") + } + } catch { + case e: Exception if remainingRetries > 0 => + println(s"Warning: Retrying due to error - ${e.getMessage}") + executeAttempt(remainingRetries - 1) + } } + + executeAttempt(retries) } def runCmdStr(cmd: String): Unit = runCmd(cmd.split(" "), new File("."), Map()) @@ -79,16 +82,6 @@ object BuildUtils { workDir) } - def packDotnetAssemblyCmd(outputDir: String, - workDir: File): Unit = - runCmd(Seq("dotnet", "pack", "--output", outputDir), workDir) - - def publishDotnetAssemblyCmd(packagePath: String, - sleetConfigFile: File): Unit = - runCmd( - Seq("sleet", "push", packagePath, "--config", sleetConfigFile.getAbsolutePath, - "--source", "SynapseMLNuget", "--force") - ) def uploadToBlob(source: String, dest: String, @@ -99,10 +92,12 @@ object BuildUtils { "--destination", container, "--destination-path", dest, "--account-name", accountName, - "--account-key", Secrets.storageKey, - "--overwrite", "true" + "--overwrite", "true", + "--auth-mode", "login" ) - runCmd(osPrefix ++ command) + + runCmd(osPrefix ++ command, retries=2) + } def downloadFromBlob(source: String, @@ -114,8 +109,9 @@ object BuildUtils { "--pattern", source, "--source", container, "--account-name", accountName, - "--account-key", Secrets.storageKey) - runCmd(osPrefix ++ command) + "--auth-mode", "login" + ) + runCmd(osPrefix ++ command, retries=2) } def singleUploadToBlob(source: String, @@ -128,14 +124,14 @@ object BuildUtils { "--container-name", container, "--name", dest, "--account-name", accountName, - "--account-key", Secrets.storageKey, - "--overwrite", "true" + "--overwrite", "true", + "--auth-mode", "login" ) ++ extraArgs - runCmd(osPrefix ++ command) + runCmd(osPrefix ++ command, retries=2) } - def allFiles(dir: File, pred: (File => Boolean) = null): Array[File] = { + def allFiles(dir: File, pred: File => Boolean = null): Array[File] = { def loop(dir: File): Array[File] = { val (dirs, files) = dir.listFiles.sorted.partition(_.isDirectory) (if (pred == null) files else files.filter(pred)) ++ dirs.flatMap(loop) @@ -157,7 +153,8 @@ object BuildUtils { val in = new BufferedInputStream(new FileInputStream(file), bufferSize) var b = 0 while (b >= 0) { - zip.write(data, 0, b); b = in.read(data, 0, bufferSize) + zip.write(data, 0, b); + b = in.read(data, 0, bufferSize) } in.close() zip.closeEntry() diff --git a/templates/publish.yml b/templates/publish.yml new file mode 100644 index 0000000000..344753e907 --- /dev/null +++ b/templates/publish.yml @@ -0,0 +1,20 @@ +steps: + - task: AzureCLI@2 + displayName: 'Publish Artifacts' + inputs: + azureSubscription: 'SynapseML Build' + scriptLocation: inlineScript + scriptType: bash + inlineScript: | + set -e + az account get-access-token --scope https://storage.azure.com/.default --output none + source activate synapseml + sbt packagePython + sbt publishBlob + env: + NEXUS-UN: $(nexus-un) + NEXUS-PW: $(nexus-pw) + PGP-PRIVATE: $(pgp-private) + PGP-PUBLIC: $(pgp-public) + PGP-PW: $(pgp-pw) + SYNAPSEML_ENABLE_PUBLISH: true \ No newline at end of file diff --git a/templates/update_cli.yml b/templates/update_cli.yml index 67c845f847..42dc16e210 100644 --- a/templates/update_cli.yml +++ b/templates/update_cli.yml @@ -8,5 +8,5 @@ steps: versionSpec: '8' jdkArchitectureOption: 'x64' jdkSourceOption: 'PreInstalled' - - bash: pip install azure-cli==2.40.0 + - bash: pip install azure-cli==2.59.0 displayName: 'Upgrade Azure CLI' diff --git a/tools/dotnet/db-init.sh b/tools/dotnet/db-init.sh deleted file mode 100644 index 3c9817e291..0000000000 --- a/tools/dotnet/db-init.sh +++ /dev/null @@ -1,42 +0,0 @@ -#!/bin/bash - -############################################################################## -# Description: -# This script installs the worker binaries and your app dependencies onto -# your Databricks Spark cluster. -# -# Usage: -# Change the variables below appropriately. -# -############################################################################## -################################# CHANGE THESE ############################### - -# DOTNET_SPARK_RELEASE to point to the appropriate version you downloaded from the -# https://github.com/dotnet/spark Releases section. For instance, for v2.1.1, you -# would set it to the following URI: -# https://github.com/dotnet/spark/releases/download/v2.1.1/Microsoft.Spark.Worker.netcoreapp3.1.linux-x64-2.1.1.tar.gz -DOTNET_SPARK_RELEASE=https://github.com/dotnet/spark/releases/download/v2.1.1/Microsoft.Spark.Worker.netcoreapp3.1.linux-x64-2.1.1.tar.gz - -# No need to change this unless you choose to use a different location -DBFS_INSTALLATION_ROOT=/dbfs/spark-dotnet -DOTNET_SPARK_WORKER_INSTALLATION_PATH=/usr/local/bin - -############################################################################### - -set +e -/bin/bash $DBFS_INSTALLATION_ROOT/install-worker.sh github $DOTNET_SPARK_RELEASE $DOTNET_SPARK_WORKER_INSTALLATION_PATH - - - -############################################################################## -# Uncomment below to deploy application dependencies to workers if submitting -# jobs using the "Set Jar" task (https://docs.databricks.com/user-guide/jobs.html#jar-jobs) -# Change the variables below appropriately -############################################################################## -################################# CHANGE THESE ############################### - -#APP_DEPENDENCIES=/dbfs/apps/dependencies -#WORKER_PATH=`readlink $DOTNET_SPARK_WORKER_INSTALLATION_PATH/Microsoft.Spark.Worker` -#if [ -f $WORKER_PATH ] && [ -d $APP_DEPENDENCIES ]; then -# sudo cp -fR $APP_DEPENDENCIES/. `dirname $WORKER_PATH` -#fi diff --git a/tools/dotnet/dotnetSetup.sh b/tools/dotnet/dotnetSetup.sh deleted file mode 100644 index c378cfa524..0000000000 --- a/tools/dotnet/dotnetSetup.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# Add Microsoft package signing key and repository -wget https://packages.microsoft.com/config/ubuntu/18.04/packages-microsoft-prod.deb -O packages-microsoft-prod.deb -sudo dpkg -i packages-microsoft-prod.deb -rm packages-microsoft-prod.deb - -# Install .NET SDK -sudo apt-get update -sudo apt-get install -y apt-transport-https dotnet-sdk-5.0 - -# Update Nuget Config to include SynapseML Feed -dotnet nuget add source https://mmlspark.blob.core.windows.net/synapsemlnuget/index.json -n SynapseMLFeed - -# Install .NET for Apache Spark -wget https://github.com/dotnet/spark/releases/download/v2.1.1/Microsoft.Spark.Worker.netcoreapp3.1.linux-x64-2.1.1.tar.gz -tar -xvf Microsoft.Spark.Worker.netcoreapp3.1.linux-x64-2.1.1.tar.gz -C ~/bin/ -export DOTNET_WORKER_DIR=~/bin/Microsoft.Spark.Worker-2.1.1 -echo "##vso[task.setvariable variable=DOTNET_WORKER_DIR]$DOTNET_WORKER_DIR" - -# Install Sleet -dotnet tool install -g sleet - -# Install Apache Spark-3.4.1 -curl https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz -o spark-3.4.1-bin-hadoop3.tgz -mkdir ~/bin -tar -xzvf spark-3.4.1-bin-hadoop3.tgz -C ~/bin -export SPARK_HOME=~/bin/spark-3.4.1-bin-hadoop3/ -export PATH=$SPARK_HOME/bin:$PATH -echo "##vso[task.setvariable variable=SPARK_HOME]$SPARK_HOME" -echo "##vso[task.setvariable variable=PATH]$SPARK_HOME/bin:$PATH" diff --git a/tools/dotnet/install-worker.sh b/tools/dotnet/install-worker.sh deleted file mode 100644 index bb8d0152f5..0000000000 --- a/tools/dotnet/install-worker.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/bash - -############################################################################## -# Description: -# This is a helper script to install the worker binaries on your Apache Spark cluster -# -# Usage: -# ./install-worker.sh -# -# Sample usage: -# ./install-worker.sh -# github -# https://github.com/dotnet/spark/releases/download/v2.1.1/Microsoft.Spark.Worker.netcoreapp3.1.linux-x64-2.1.1.tar.gz -# /usr/local/bin -# -# or if you have your Worker release on filesystem like ABFS, here's how the path would -# look like: -# ./install-worker.sh -# azure -# abfs://@.dfs.core.windows.net//Microsoft.Spark.Worker.netcoreapp3.1.linux-x64-2.1.1.tar.gz -# /usr/local/bin -# -############################################################################## - -set +e - -# Uncomment if you want full tracing (for debugging purposes) -#set -o xtrace - -# Cloud Provider -CLOUD_PROVIDER=$1 - -# Path where packaged worker file (tgz) exists. -SRC_WORKER_PATH_OR_URI=$2 - -# The path on the executor nodes where Microsoft.Spark.Worker executable is installed. -WORKER_INSTALLATION_PATH=$3 - -# The path where all the dependent libraies are installed so that it doesn't -# pollute the $WORKER_INSTALLATION_PATH. -SPARKDOTNET_ROOT=$WORKER_INSTALLATION_PATH/spark-dotnet - -# Temporary worker file. -TEMP_WORKER_FILENAME=/tmp/temp_worker.tgz - -# Extract version -IFS='-' read -ra BASE_FILENAME <<< "$(basename $SRC_WORKER_PATH_OR_URI .tar.gz)" -VERSION=${BASE_FILENAME[2]} - -IFS='.' read -ra VERSION_CHECK <<< "$VERSION" -[[ ${#VERSION_CHECK[@]} == 3 ]] || { echo >&2 "Version check does not satisfy. Raise an issue here: https://github.com/dotnet/spark"; exit 1; } - -# Path of the final destination for the worker binaries -# (the one we just downloaded and extracted) -DEST_WORKER_PATH=$SPARKDOTNET_ROOT/Microsoft.Spark.Worker-$VERSION -DEST_WORKER_BINARY=$DEST_WORKER_PATH/Microsoft.Spark.Worker - -# Clean up any existing files. -sudo rm -f $WORKER_INSTALLATION_PATH/Microsoft.Spark.Worker -sudo rm -rf $SPARKDOTNET_ROOT - -# Copy the worker file to a local temporary file. -if [ $"${CLOUD_PROVIDER,,}" = "github" ]; then - wget $SRC_WORKER_PATH_OR_URI -O $TEMP_WORKER_FILENAME -elif [ "${CLOUD_PROVIDER,,}" = "azure" ]; then - hdfs dfs -get $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME -elif [ "${CLOUD_PROVIDER,,}" = "aws" ]; then - aws s3 cp $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME -else - cp -f $SRC_WORKER_PATH_OR_URI $TEMP_WORKER_FILENAME -fi - -# Untar the file. -sudo mkdir -p $SPARKDOTNET_ROOT -sudo tar xzf $TEMP_WORKER_FILENAME -C $SPARKDOTNET_ROOT - -# Make the file executable since dotnet doesn't set this correctly. -sudo chmod 755 $DEST_WORKER_BINARY - -# Create a symlink. -sudo ln -sf $DEST_WORKER_BINARY $WORKER_INSTALLATION_PATH/Microsoft.Spark.Worker - -# Remove the temporary worker file. -sudo rm $TEMP_WORKER_FILENAME diff --git a/website/docusaurus.config.js b/website/docusaurus.config.js index b772e1c9ce..47a393c770 100644 --- a/website/docusaurus.config.js +++ b/website/docusaurus.config.js @@ -61,10 +61,6 @@ module.exports = { { label: 'Scala', href: `https://mmlspark.blob.core.windows.net/docs/${version}/scala/com/microsoft/azure/synapse/ml/index.html`, - }, - { - label: 'C#', - href: `https://mmlspark.blob.core.windows.net/docs/${version}/dotnet/index.html`, } ] }, diff --git a/website/sidebars.js b/website/sidebars.js index 6b9747f80a..0ab11ee7ed 100644 --- a/website/sidebars.js +++ b/website/sidebars.js @@ -186,9 +186,6 @@ module.exports = { "Reference/Developer Setup", "Reference/Docker Setup", "Reference/R Setup", - "Reference/Dotnet Setup", - "Reference/Quickstart - LightGBM in Dotnet", - ], }, ], diff --git a/website/src/pages/index.js b/website/src/pages/index.js index 6bd4db6637..a9599a50cc 100644 --- a/website/src/pages/index.js +++ b/website/src/pages/index.js @@ -258,7 +258,6 @@ function Home() { { label: "Docker", value: "Docker" }, { label: "Python", value: "Python" }, { label: "SBT", value: "SBT" }, - { label: ".NET", value: "dotnet" } ]} > @@ -422,20 +421,6 @@ libraryDependencies += "com.microsoft.azure" %% "synapseml_2.12" % "1.0.4" // Pl lang="jsx" > - - To try out SynapseML with .NET, you should add SynapseML's assembly into reference: - - For detailed installation, please refer this{" "} - instruction. - diff --git a/website/src/theme/DocumentationTable/index.js b/website/src/theme/DocumentationTable/index.js index 015d321e0d..875cda59e6 100644 --- a/website/src/theme/DocumentationTable/index.js +++ b/website/src/theme/DocumentationTable/index.js @@ -7,7 +7,6 @@ function DocTable(props) { const version = context.siteConfig.customFields.version; let pyLink = `https://mmlspark.blob.core.windows.net/docs/${version}/pyspark/${py}`; let scalaLink = `https://mmlspark.blob.core.windows.net/docs/${version}/scala/${scala}`; - let csharpLink = `https://mmlspark.blob.core.windows.net/docs/${version}/dotnet/${csharp}`; return ( @@ -25,12 +24,6 @@ function DocTable(props) { {className} -
- .NET API: - - {className} - - Source: