diff --git a/.gitignore b/.gitignore index 8fd3247cec1..ec1a9ac15e9 100644 --- a/.gitignore +++ b/.gitignore @@ -47,4 +47,7 @@ node_modules/ .Rproj.user # R output -*.Rout \ No newline at end of file +*.Rout + +# Misc +.bsp diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 46b481c7130..739b6065c41 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -55,7 +55,7 @@ this process: #### Implement documentation -- Add a [sample Jupyter notebook](notebooks/samples) that shows the intended use +- Add a [sample Jupyter notebook](notebooks/) that shows the intended use case of your algorithm, with instructions in step-by-step manner. (The same notebook could be used for testing the code.) - Add in-line ScalaDoc comments to your source code, to generate the [API diff --git a/README.md b/README.md index 58c5cdcec6f..f7618c97eac 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ PySpark](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc3/pyspark/index.htm | | | | |:--:|:--:|:--:| -| **Isolation Forest on Spark** | [**CyberML**](https://github.com/Azure/mmlspark/blob/master/notebooks/samples/CyberML%20-%20Anomalous%20Access%20Detection.ipynb) | **Conditional KNN** | +| **Isolation Forest on Spark** | [**CyberML**](https://github.com/Azure/mmlspark/blob/master/notebooks/CyberML%20-%20Anomalous%20Access%20Detection.ipynb) | **Conditional KNN** | | Distributed Nonlinear Outlier Detection | Machine Learning Tools for Cyber Security | Scalable KNN Models with Conditional Queries | @@ -86,29 +86,29 @@ PySpark](https://mmlspark.blob.core.windows.net/docs/1.0.0-rc3/pyspark/index.htm - Train and evaluate a flight delay prediction system ([example 2]) - Finding anomalous data access patterns using the Access Anomalies package of CyberML ([example 11]) -See our [notebooks](notebooks/samples/) for all examples. +See our [notebooks](notebooks/) for all examples. -[example 1]: notebooks/samples/Classification%20-%20Adult%20Census.ipynb "Adult Census Income Training" +[example 1]: notebooks/Classification%20-%20Adult%20Census.ipynb "Adult Census Income Training" -[example 2]: notebooks/samples/Regression%20-%20Flight%20Delays.ipynb "Regression Example with Flight Delay Dataset" +[example 2]: notebooks/Regression%20-%20Flight%20Delays.ipynb "Regression Example with Flight Delay Dataset" -[example 3]: notebooks/samples/LightGBM%20-%20Quantile%20Regression%20for%20Drug%20Discovery.ipynb "Quantile Regression with LightGBM" +[example 3]: notebooks/LightGBM%20-%20Quantile%20Regression%20for%20Drug%20Discovery.ipynb "Quantile Regression with LightGBM" -[example 4]: notebooks/samples/TextAnalytics%20-%20Amazon%20Book%20Reviews.ipynb "Amazon Book Reviews - TextFeaturizer" +[example 4]: notebooks/TextAnalytics%20-%20Amazon%20Book%20Reviews.ipynb "Amazon Book Reviews - TextFeaturizer" -[example 5]: notebooks/samples/HyperParameterTuning%20-%20Fighting%20Breast%20Cancer.ipynb "Hyperparameter Tuning with MMLSpark" +[example 5]: notebooks/HyperParameterTuning%20-%20Fighting%20Breast%20Cancer.ipynb "Hyperparameter Tuning with MMLSpark" -[example 6]: notebooks/samples/DeepLearning%20-%20CIFAR10%20Convolutional%20Network.ipynb "CIFAR10 CNTK CNN Evaluation" +[example 6]: notebooks/DeepLearning%20-%20CIFAR10%20Convolutional%20Network.ipynb "CIFAR10 CNTK CNN Evaluation" -[example 7]: notebooks/samples/OpenCV%20-%20Pipeline%20Image%20Transformations.ipynb "Pipeline Image Transformations" +[example 7]: notebooks/OpenCV%20-%20Pipeline%20Image%20Transformations.ipynb "Pipeline Image Transformations" -[example 8]: notebooks/samples/DeepLearning%20-%20BiLSTM%20Medical%20Entity%20Extraction.ipynb "Medical Entity Extraction" +[example 8]: notebooks/DeepLearning%20-%20BiLSTM%20Medical%20Entity%20Extraction.ipynb "Medical Entity Extraction" -[example 9]: notebooks/samples/DeepLearning%20-%20Flower%20Image%20Classification.ipynb "Deep Flower Classification" +[example 9]: notebooks/DeepLearning%20-%20Flower%20Image%20Classification.ipynb "Deep Flower Classification" [example 10]: notebooks/gpu/DeepLearning%20-%20Distributed%20CNTK%20training.ipynb "CIFAR10 CNTK CNN Training" -[example 11]: notebooks/samples/CyberML%20-%20Anomalous%20Access%20Detection.ipynb "Access Anomalies documenation, training and evaluation example" +[example 11]: notebooks/CyberML%20-%20Anomalous%20Access%20Detection.ipynb "Access Anomalies documenation, training and evaluation example" ## A short example @@ -127,7 +127,7 @@ scoredImages = cntkModel.transform(imagesWithLabels) ... ``` -See [other sample notebooks](notebooks/samples/) as well as the MMLSpark +See [other sample notebooks](notebooks/) as well as the MMLSpark documentation for [Scala](http://mmlspark.azureedge.net/docs/scala/) and [PySpark](http://mmlspark.azureedge.net/docs/pyspark/). diff --git a/build.sbt b/build.sbt index 0d10df561f6..130abf8606d 100644 --- a/build.sbt +++ b/build.sbt @@ -1,22 +1,20 @@ import java.io.{File, PrintWriter} import java.net.URL + import org.apache.commons.io.FileUtils import sbt.ExclusionRule -import sbt.internal.util.ManagedLogger import scala.xml.{Node => XmlNode, NodeSeq => XmlNodeSeq, _} import scala.xml.transform.{RewriteRule, RuleTransformer} -import scala.sys.process.Process import BuildUtils._ +import CodegenPlugin.autoImport.pythonizedVersion +import sbt.Project.projectToRef +import xerial.sbt.Sonatype._ -val condaEnvName = "mmlspark" -name := "mmlspark" -organization := "com.microsoft.ml.spark" -scalaVersion := "2.12.10" +ThisBuild / organization := "com.microsoft.ml.spark" +ThisBuild / scalaVersion := "2.12.10" val sparkVersion = "3.0.1" -//val scalaMajorVersion = settingKey[String]("scalaMajorVersion") -//scalaMajorVersion := {scalaVersion.value.split(".".toCharArray).dropRight(0).mkString(".")} val scalaMajorVersion = 2.12 val excludes = Seq( @@ -24,42 +22,28 @@ val excludes = Seq( ExclusionRule("org.scalatest") ) -libraryDependencies ++= Seq( +val coreDependencies = Seq( "org.apache.spark" %% "spark-core" % sparkVersion % "compile", "org.apache.spark" %% "spark-mllib" % sparkVersion % "compile", "org.apache.spark" %% "spark-avro" % sparkVersion % "provided", "org.apache.spark" %% "spark-tags" % sparkVersion % "test", "org.scalatest" %% "scalatest" % "3.0.5" % "test") - -libraryDependencies ++= Seq( +val extraDependencies = Seq( "org.scalactic" %% "scalactic" % "3.0.5", "io.spray" %% "spray-json" % "1.3.2", - "com.microsoft.cntk" % "cntk" % "2.4", - "org.openpnp" % "opencv" % "3.2.0-1", "com.jcraft" % "jsch" % "0.1.54", - "com.microsoft.cognitiveservices.speech" % "client-sdk" % "1.14.0", "org.apache.httpcomponents" % "httpclient" % "4.5.6", "org.apache.httpcomponents" % "httpmime" % "4.5.6", - "com.microsoft.ml.lightgbm" % "lightgbmlib" % "3.2.110", - "com.github.vowpalwabbit" % "vw-jni" % "8.9.1", "com.linkedin.isolation-forest" %% "isolation-forest_3.0.0" % "1.0.1", ).map(d => d excludeAll (excludes: _*)) +val dependencies = coreDependencies ++ extraDependencies def txt(e: Elem, label: String): String = "\"" + e.child.filter(_.label == label).flatMap(_.text).mkString + "\"" -def activateCondaEnv: Seq[String] = { - if (sys.props("os.name").toLowerCase.contains("windows")) { - osPrefix ++ Seq("activate", condaEnvName, "&&") - } else { - Seq() - //TODO figure out why this doesent work - //Seq("/bin/bash", "-l", "-c", "source activate " + condaEnvName, "&&") - } -} - val omittedDeps = Set(s"spark-core_${scalaMajorVersion}", s"spark-mllib_${scalaMajorVersion}", "org.scala-lang") // skip dependency elements with a scope -pomPostProcess := { (node: XmlNode) => + +def pomPostFunc(node: XmlNode): scala.xml.Node = { new RuleTransformer(new RewriteRule { override def transform(node: XmlNode): XmlNodeSeq = node match { case e: Elem if e.label == "dependency" @@ -77,191 +61,17 @@ pomPostProcess := { (node: XmlNode) => }).transform(node).head } -resolvers += "Speech" at "https://mmlspark.blob.core.windows.net/maven/" - -val createCondaEnvTask = TaskKey[Unit]("createCondaEnv", "create conda env") -createCondaEnvTask := { - val s = streams.value - val hasEnv = Process("conda env list").lineStream.toList - .map(_.split("\\s+").head).contains(condaEnvName) - if (!hasEnv) { - runCmd(Seq("conda", "env", "create", "-f", "environment.yaml")) - } else { - println("Found conda env " + condaEnvName) - } -} - -val condaEnvLocation = TaskKey[String]("condaEnvLocation", "get install location of conda env") -condaEnvLocation := { - val s = streams.value - createCondaEnvTask.value - Process("conda env list").lineStream.toList - .map(_.split("\\s+")) - .map(l => (l.head, l.reverse.head)) - .filter(p => p._1 == condaEnvName) - .head._2 -} - - -val cleanCondaEnvTask = TaskKey[Unit]("cleanCondaEnv", "create conda env") -cleanCondaEnvTask := { - runCmd(Seq("conda", "env", "remove", "--name", condaEnvName, "-y")) -} - -val codegenTask = TaskKey[Unit]("codegen", "Generate Code") -codegenTask := { - (runMain in Test).toTask(" com.microsoft.ml.spark.codegen.CodeGen").value -} - -val testgenTask = TaskKey[Unit]("testgen", "Generate Tests") -testgenTask := { - (runMain in Test).toTask(" com.microsoft.ml.spark.codegen.TestGen").value -} - -val genDir = join("target", s"scala-${scalaMajorVersion}", "generated") -val unidocDir = join("target", s"scala-${scalaMajorVersion}", "unidoc") -val pythonSrcDir = join(genDir.toString, "src", "python") -val unifiedDocDir = join(genDir.toString, "doc") -val pythonDocDir = join(unifiedDocDir.toString, "pyspark") -val pythonPackageDir = join(genDir.toString, "package", "python") -val pythonTestDir = join(genDir.toString, "test", "python") -val rSrcDir = join(genDir.toString, "src", "R", "mmlspark") -val rPackageDir = join(genDir.toString, "package", "R") - -val pythonizedVersion = settingKey[String]("Pythonized version") -pythonizedVersion := { - if (version.value.contains("-")) { - version.value.split("-".head).head + ".dev1" - } else { - version.value - } -} - -val rVersion = settingKey[String]("R version") -rVersion := { - if (version.value.contains("-")) { - version.value.split("-".head).head - } else { - version.value - } -} - -def rCmd(cmd: Seq[String], wd: File, libPath: String): Unit = { - runCmd(activateCondaEnv ++ cmd, wd, Map("R_LIBS" -> libPath, "R_USER_LIBS" -> libPath)) -} - -val packageR = TaskKey[Unit]("packageR", "Generate roxygen docs and zip R package") -packageR := { - createCondaEnvTask.value - codegenTask.value - val libPath = join(condaEnvLocation.value, "Lib", "R", "library").toString - rCmd(Seq("R", "-q", "-e", "roxygen2::roxygenise()"), rSrcDir, libPath) - rPackageDir.mkdirs() - zipFolder(rSrcDir, new File(rPackageDir, s"mmlspark-${version.value}.zip")) -} - -val testR = TaskKey[Unit]("testR", "Run testthat on R tests") -testR := { - packageR.value - publishLocal.value - val libPath = join(condaEnvLocation.value, "Lib", "R", "library").toString - rCmd(Seq("R", "CMD", "INSTALL", "--no-multiarch", "--with-keep.source", "mmlspark"), rSrcDir.getParentFile, libPath) - val testRunner = join("tools", "tests", "run_r_tests.R").getAbsolutePath - rCmd(Seq("Rscript", testRunner), rSrcDir, libPath) -} - -val publishR = TaskKey[Unit]("publishR", "publish R package to blob") -publishR := { - codegenTask.value - packageR.value - val rPackage = rPackageDir.listFiles().head - singleUploadToBlob(rPackage.toString, rPackage.getName, "rrr") -} - -val packagePythonTask = TaskKey[Unit]("packagePython", "Package python sdk") -packagePythonTask := { - codegenTask.value - createCondaEnvTask.value - val destPyDir = join("target", s"scala-${scalaMajorVersion}", "classes", "mmlspark") - if (destPyDir.exists()) FileUtils.forceDelete(destPyDir) - FileUtils.copyDirectory(join(pythonSrcDir.getAbsolutePath, "mmlspark"), destPyDir) - runCmd( - activateCondaEnv ++ - Seq(s"python", "setup.py", "bdist_wheel", "--universal", "-d", s"${pythonPackageDir.absolutePath}"), - pythonSrcDir) -} - -val installPipPackageTask = TaskKey[Unit]("installPipPackage", "install python sdk") -installPipPackageTask := { - packagePythonTask.value - publishLocal.value - runCmd( - activateCondaEnv ++ Seq("pip", "install", "-I", - s"mmlspark-${pythonizedVersion.value}-py2.py3-none-any.whl"), - pythonPackageDir) -} - -val generatePythonDoc = TaskKey[Unit]("generatePythonDoc", "Generate sphinx docs for python") -generatePythonDoc := { - installPipPackageTask.value - runCmd(activateCondaEnv ++ Seq("sphinx-apidoc", "-f", "-o", "doc", "."), - join(pythonSrcDir.toString, "mmlspark")) - runCmd(activateCondaEnv ++ Seq("sphinx-build", "-b", "html", "doc", "../../../doc/pyspark"), - join(pythonSrcDir.toString, "mmlspark")) -} - -val publishDocs = TaskKey[Unit]("publishDocs", "publish docs for scala and python") -publishDocs := { - generatePythonDoc.value - (Compile / unidoc).value - val html = - """ - |
- |pyspark/ - |scala/ - |- """.stripMargin - val scalaDir = join(unifiedDocDir.toString, "scala") - if (scalaDir.exists()) FileUtils.forceDelete(scalaDir) - FileUtils.copyDirectory(unidocDir, scalaDir) - FileUtils.writeStringToFile(join(unifiedDocDir.toString, "index.html"), html, "utf-8") - uploadToBlob(unifiedDocDir.toString, version.value, "docs") -} - -val publishPython = TaskKey[Unit]("publishPython", "publish python wheel") -publishPython := { - publishLocal.value - packagePythonTask.value - singleUploadToBlob( - join(pythonPackageDir.toString, s"mmlspark-${pythonizedVersion.value}-py2.py3-none-any.whl").toString, - version.value + s"/mmlspark-${pythonizedVersion.value}-py2.py3-none-any.whl", - "pip") -} +pomPostProcess := pomPostFunc -val testPythonTask = TaskKey[Unit]("testPython", "test python sdk") - -testPythonTask := { - installPipPackageTask.value - testgenTask.value - runCmd( - activateCondaEnv ++ Seq("python", - "-m", - "pytest", - "--cov=mmlspark", - "--junitxml=../../../../python-test-results.xml", - "--cov-report=xml", - "mmlsparktest" - ), - new File(s"target/scala-${scalaMajorVersion}/generated/test/python/") - ) -} +val speechResolver = "Speech" at "https://mmlspark.blob.core.windows.net/maven/" val getDatasetsTask = TaskKey[Unit]("getDatasets", "download datasets used for testing") val datasetName = "datasets-2020-08-27.tgz" val datasetUrl = new URL(s"https://mmlspark.blob.core.windows.net/installers/$datasetName") val datasetDir = settingKey[File]("The directory that holds the dataset") -datasetDir := { - join(target.value.toString, s"scala-${scalaMajorVersion}", "datasets", datasetName.split(".".toCharArray.head).head) +ThisBuild / datasetDir := { + join(artifactPath.in(packageBin).in(Compile).value.getParentFile, + "datasets", datasetName.split(".".toCharArray.head).head) } getDatasetsTask := { @@ -276,48 +86,61 @@ getDatasetsTask := { val genBuildInfo = TaskKey[Unit]("genBuildInfo", "generate a build info file") genBuildInfo := { - val buildInfo = + val docInfo = s""" - |MMLSpark Build and Release Information - |--------------- - | - |### Maven Coordinates - | `${organization.value}:${name.value}_${scalaMajorVersion}:${version.value}` - | - |### Maven Resolver - | `https://mmlspark.azureedge.net/maven` | |### Documentation Pages: |[Scala Documentation](https://mmlspark.blob.core.windows.net/docs/${version.value}/scala/index.html) |[Python Documentation](https://mmlspark.blob.core.windows.net/docs/${version.value}/pyspark/index.html) | """.stripMargin + val buildInfo = (root / blobArtifactInfo).value + docInfo val infoFile = join("target", "Build.md") if (infoFile.exists()) FileUtils.forceDelete(infoFile) FileUtils.writeStringToFile(infoFile, buildInfo, "utf-8") } -val setupTask = TaskKey[Unit]("setup", "set up library for intellij") -setupTask := { - (Compile / compile).toTask.value - (Test / compile).toTask.value - getDatasetsTask.value +val rootGenDir = SettingKey[File]("rootGenDir") +rootGenDir := { + val targetDir = artifactPath.in(packageBin).in(Compile).in(root).value.getParentFile + join(targetDir, "generated") } -val publishBlob = TaskKey[Unit]("publishBlob", "publish the library to mmlspark blob") -publishBlob := { - publishM2.value - val scalaVersionSuffix = scalaVersion.value.split(".".toCharArray.head).dropRight(1).mkString(".") - val nameAndScalaVersion = s"${name.value}_$scalaVersionSuffix" - - val localPackageFolder = join( - Seq(new File(new URI(Resolver.mavenLocal.root)).getAbsolutePath) - ++ organization.value.split(".".toCharArray.head) - ++ Seq(nameAndScalaVersion, version.value): _*).toString +val generatePythonDoc = TaskKey[Unit]("generatePythonDoc", "Generate sphinx docs for python") +generatePythonDoc := { + installPipPackage.all(ScopeFilter( + inProjects(core, deepLearning, cognitive, vw, lightgbm, opencv), + inConfigurations(Compile))).value + mergePyCode.all(ScopeFilter( + inProjects(core, deepLearning, cognitive, vw, lightgbm, opencv), + inConfigurations(Compile)) + ).value + val targetDir = artifactPath.in(packageBin).in(Compile).in(root).value.getParentFile + val codegenDir = join(targetDir, "generated") + val dir = join(codegenDir, "src", "python", "mmlspark") + runCmd(activateCondaEnv.value ++ Seq("sphinx-apidoc", "-f", "-o", "doc", "."), dir) + runCmd(activateCondaEnv.value ++ Seq("sphinx-build", "-b", "html", "doc", "../../../doc/pyspark"), dir) +} - val blobMavenFolder = organization.value.replace(".", "/") + - s"/$nameAndScalaVersion/${version.value}" - uploadToBlob(localPackageFolder, blobMavenFolder, "maven") +val publishDocs = TaskKey[Unit]("publishDocs", "publish docs for scala and python") +publishDocs := { + //generatePythonDoc.value + (root / Compile / unidoc).value + val html = + """ + |
+ |pyspark/ + |scala/ + |+ """.stripMargin + val targetDir = artifactPath.in(packageBin).in(Compile).in(root).value.getParentFile + val codegenDir = join(targetDir, "generated") + val unifiedDocDir = join(codegenDir, "doc") + val scalaDir = join(unifiedDocDir.toString, "scala") + if (scalaDir.exists()) FileUtils.forceDelete(scalaDir) + FileUtils.copyDirectory(join(targetDir, "unidoc"), scalaDir) + FileUtils.writeStringToFile(join(unifiedDocDir.toString, "index.html"), html, "utf-8") + uploadToBlob(unifiedDocDir.toString, version.value, "docs") } val release = TaskKey[Unit]("release", "publish the library to mmlspark blob") @@ -355,11 +178,8 @@ publishBadges := { } val settings = Seq( - (scalastyleConfig in Test) := baseDirectory.value / "scalastyle-test-config.xml", + (scalastyleConfig in Test) := (ThisBuild / baseDirectory).value / "scalastyle-test-config.xml", logBuffered in Test := false, - buildInfoKeys := Seq[BuildInfoKey]( - name, version, scalaVersion, sbtVersion, - baseDirectory, datasetDir, pythonizedVersion, rVersion), parallelExecution in Test := false, test in assembly := {}, assemblyMergeStrategy in assembly := { @@ -367,14 +187,84 @@ val settings = Seq( case x => MergeStrategy.first }, assemblyOption in assembly := (assemblyOption in assembly).value.copy(includeScala = false), - buildInfoPackage := "com.microsoft.ml.spark.build") - -lazy val mmlspark = (project in file(".")) - .enablePlugins(BuildInfoPlugin) - .enablePlugins(ScalaUnidocPlugin) - .settings(settings: _*) + autoAPIMappings := true, + pomPostProcess := pomPostFunc, +) +ThisBuild / publishMavenStyle := true + +lazy val core = (project in file("core")) + .enablePlugins(BuildInfoPlugin && SbtPlugin) + .settings((settings ++ Seq( + libraryDependencies ++= dependencies, + buildInfoKeys ++= Seq[BuildInfoKey]( + datasetDir, + version, + scalaVersion, + sbtVersion, + baseDirectory + ), + name := "mmlspark-core", + buildInfoPackage := "com.microsoft.ml.spark.build", + )): _*) + +lazy val deepLearning = (project in file("deep-learning")) + .enablePlugins(SbtPlugin) + .dependsOn(core % "test->test;compile->compile") + .settings((settings ++ Seq( + libraryDependencies += ("com.microsoft.cntk" % "cntk" % "2.4"), + name := "mmlspark-deep-learning", + )): _*) + +lazy val lightgbm = (project in file("lightgbm")) + .enablePlugins(SbtPlugin) + .dependsOn(core % "test->test;compile->compile") + .settings((settings ++ Seq( + libraryDependencies += ("com.microsoft.ml.lightgbm" % "lightgbmlib" % "3.2.110"), + name := "mmlspark-lightgbm" + )): _*) + +lazy val vw = (project in file("vw")) + .enablePlugins(SbtPlugin) + .dependsOn(core % "test->test;compile->compile") + .settings((settings ++ Seq( + libraryDependencies += ("com.github.vowpalwabbit" % "vw-jni" % "8.9.1"), + name := "mmlspark-vw" + )): _*) + +lazy val cognitive = (project in file("cognitive")) + .enablePlugins(SbtPlugin) + .dependsOn(core % "test->test;compile->compile") + .settings((settings ++ Seq( + libraryDependencies += ("com.microsoft.cognitiveservices.speech" % "client-sdk" % "1.14.0"), + resolvers += speechResolver, + name := "mmlspark-cognitive" + )): _*) + +lazy val opencv = (project in file("opencv")) + .enablePlugins(SbtPlugin) + .dependsOn(core % "test->test;compile->compile") + .settings((settings ++ Seq( + libraryDependencies += ("org.openpnp" % "opencv" % "3.2.0-1"), + name := "mmlspark-opencv" + )): _*) + +lazy val root = (project in file(".")) + .aggregate(core, deepLearning, cognitive, vw, lightgbm, opencv) + .dependsOn(core, deepLearning, cognitive, vw, lightgbm, opencv) + .enablePlugins(ScalaUnidocPlugin && SbtPlugin) + .disablePlugins(CodegenPlugin) + .settings(settings ++ Seq( + name := "mmlspark", + )) -import xerial.sbt.Sonatype._ +val setupTask = TaskKey[Unit]("setup", "set up library for intellij") +setupTask := { + compile.all(ScopeFilter( + inProjects(core, deepLearning, cognitive, vw, lightgbm, opencv), + inConfigurations(Compile, Test)) + ).value + getDatasetsTask.value +} sonatypeProjectHosting := Some( GitHubHosting("Azure", "MMLSpark", "mmlspark-support@microsot.com")) @@ -389,33 +279,30 @@ developers := List( ) licenses += ("MIT", url("https://github.com/Azure/mmlspark/blob/master/LICENSE")) -publishMavenStyle := true - -credentials += Credentials("Sonatype Nexus Repository Manager", - "oss.sonatype.org", - Secrets.nexusUsername, - Secrets.nexusPassword) - -pgpPassphrase := Some(Secrets.pgpPassword.toCharArray) -pgpSecretRing := { - val temp = File.createTempFile("secret", ".asc") - new PrintWriter(temp) { - write(Secrets.pgpPrivate); - close() - } - temp -} -pgpPublicRing := { - val temp = File.createTempFile("public", ".asc") - new PrintWriter(temp) { - write(Secrets.pgpPublic); - close() - } - temp -} + +//credentials += Credentials("Sonatype Nexus Repository Manager", +// "oss.sonatype.org", +// Secrets.nexusUsername, +// Secrets.nexusPassword) +// +//pgpPassphrase := Some(Secrets.pgpPassword.toCharArray) +//pgpSecretRing := { +// val temp = File.createTempFile("secret", ".asc") +// new PrintWriter(temp) { +// write(Secrets.pgpPrivate); +// close() +// } +// temp +//} +//pgpPublicRing := { +// val temp = File.createTempFile("public", ".asc") +// new PrintWriter(temp) { +// write(Secrets.pgpPublic); +// close() +// } +// temp +//} +//publishTo := sonatypePublishToBundle.value dynverSonatypeSnapshots in ThisBuild := true dynverSeparator in ThisBuild := "-" -publishTo := sonatypePublishToBundle.value - -// Break Cache - 1 diff --git a/src/main/python/mmlspark/cognitive/AzureSearchWriter.py b/cognitive/src/main/python/mmlspark/cognitive/AzureSearchWriter.py similarity index 100% rename from src/main/python/mmlspark/cognitive/AzureSearchWriter.py rename to cognitive/src/main/python/mmlspark/cognitive/AzureSearchWriter.py diff --git a/src/main/python/mmlspark/cognitive/BingImageSearch.py b/cognitive/src/main/python/mmlspark/cognitive/BingImageSearch.py similarity index 100% rename from src/main/python/mmlspark/cognitive/BingImageSearch.py rename to cognitive/src/main/python/mmlspark/cognitive/BingImageSearch.py diff --git a/src/__init__.py b/cognitive/src/main/python/mmlspark/cognitive/__init__.py similarity index 100% rename from src/__init__.py rename to cognitive/src/main/python/mmlspark/cognitive/__init__.py diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/AnamolyDetection.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnamolyDetection.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/AnamolyDetection.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnamolyDetection.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetectorSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetectorSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetectorSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AnomalyDetectorSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/AudioStreams.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AudioStreams.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/AudioStreams.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AudioStreams.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala similarity index 99% rename from src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala index 96024a68b63..b405bb13b09 100644 --- a/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala +++ b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearch.scala @@ -143,7 +143,8 @@ object AzureSearchWriter extends IndexParser with SLogging { val Logger: Logger = LogManager.getRootLogger - private def checkForErrors(fatal: Boolean)(errorRow: Row, inputRow: Row): Option[Row] = { + private def checkForErrors( + fatal: Boolean)(errorRow: Row, inputRow: Row): Option[Row] = { Option(errorRow).map { r => val message = s"Service Exception:\n\t ${r.toString()} \n for input:\n\t ${inputRow.toString()}" if (fatal) { diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchAPI.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchAPI.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchAPI.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchAPI.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/AzureSearchSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/BingImageSearch.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceBase.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceBase.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceBase.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceBase.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/CognitiveServiceSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVision.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVision.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVision.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVision.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVisionSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVisionSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVisionSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ComputerVisionSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/Face.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/Face.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/Face.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/Face.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/FaceSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FaceSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/FaceSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/FaceSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/ImageSearchSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ImageSearchSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/ImageSearchSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/ImageSearchSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/OCRSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/OCRSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/OCRSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/OCRSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/RESTHelpers.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/RESTHelpers.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/RESTHelpers.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/RESTHelpers.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechAPI.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechAPI.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/SpeechAPI.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechAPI.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/SpeechSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToText.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToText.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToText.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToText.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala similarity index 98% rename from src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala index 51a965b0d08..45447ac5f2d 100644 --- a/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala +++ b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/SpeechToTextSDK.scala @@ -8,15 +8,17 @@ import java.lang.ProcessBuilder.Redirect import java.net.{URI, URL} import java.util.UUID import java.util.concurrent.{LinkedBlockingQueue, TimeUnit} + import com.microsoft.cognitiveservices.speech._ import com.microsoft.cognitiveservices.speech.audio._ -import com.microsoft.cognitiveservices.speech.transcription.{Conversation, ConversationTranscriber, - ConversationTranscriptionEventArgs, Participant} +import com.microsoft.cognitiveservices.speech.transcription.{ + Conversation, ConversationTranscriber, ConversationTranscriptionEventArgs, Participant} import com.microsoft.cognitiveservices.speech.util.EventHandler import com.microsoft.ml.spark.build.BuildInfo import com.microsoft.ml.spark.cognitive.SpeechFormat._ import com.microsoft.ml.spark.core.contracts.HasOutputCol import com.microsoft.ml.spark.core.schema.{DatasetExtensions, SparkBindings} +import com.microsoft.ml.spark.core.utils.OsUtils import com.microsoft.ml.spark.io.http.HasURL import com.microsoft.ml.spark.logging.BasicLogging import com.microsoft.ml.spark.{CompressedStream, WavStream} @@ -36,10 +38,6 @@ import spray.json._ import scala.concurrent.{ExecutionContext, Future, blocking} import scala.language.existentials -object OsUtils { - val IsWindows: Boolean = System.getProperty("os.name").toLowerCase().indexOf("win") >= 0 -} - object SpeechToTextSDK extends ComplexParamsReadable[SpeechToTextSDK] private[ml] class BlockingQueueIterator[T](lbq: LinkedBlockingQueue[Option[T]], diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalytics.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalytics.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalytics.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalytics.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemas.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemas.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemas.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemas.scala diff --git a/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemasV2.scala b/cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemasV2.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemasV2.scala rename to cognitive/src/main/scala/com/microsoft/ml/spark/cognitive/TextAnalyticsSchemasV2.scala diff --git a/src/main/__init__.py b/cognitive/src/test/python/mmlsparktest/cognitive/__init__.py similarity index 100% rename from src/main/__init__.py rename to cognitive/src/test/python/mmlsparktest/cognitive/__init__.py diff --git a/src/test/python/mmlsparktest/cognitive/test_simple.py b/cognitive/src/test/python/mmlsparktest/cognitive/test_simple.py similarity index 100% rename from src/test/python/mmlsparktest/cognitive/test_simple.py rename to cognitive/src/test/python/mmlsparktest/cognitive/test_simple.py diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala similarity index 100% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/AnamolyDetectionSuite.scala diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala similarity index 99% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala index 11a75834a4f..6255d9462b4 100644 --- a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala +++ b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ComputerVisionSuite.scala @@ -9,12 +9,10 @@ import com.microsoft.ml.spark.core.test.base.{Flaky, TestBase} import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing} import org.apache.spark.ml.NamespaceInjections.pipelineModel import org.apache.spark.ml.util.MLReadable -import org.apache.spark.sql.functions.{corr, typedLit} +import org.apache.spark.sql.functions.typedLit import org.apache.spark.sql.{DataFrame, Dataset, Row} import org.scalactic.Equality -import org.scalatest.Assertion import com.microsoft.ml.spark.FluentAPI._ -import com.microsoft.ml.spark.featurize.text.PageSplitter trait CognitiveKey { lazy val cognitiveKey = sys.env.getOrElse("COGNITIVE_API_KEY", Secrets.CognitiveApiKey) diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceAPI.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceAPI.scala similarity index 100% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceAPI.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceAPI.scala diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala similarity index 100% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/FaceSuite.scala diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala similarity index 100% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/ImageSearchSuite.scala diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala similarity index 100% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split1/TextAnalyticsSuite.scala diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala similarity index 100% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SearchWriterSuite.scala diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala similarity index 100% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split2/SpeechToTextSuite.scala diff --git a/src/test/scala/com/microsoft/ml/spark/cognitive/split3/SpeechToTextSDKSuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split3/SpeechToTextSDKSuite.scala similarity index 100% rename from src/test/scala/com/microsoft/ml/spark/cognitive/split3/SpeechToTextSDKSuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/cognitive/split3/SpeechToTextSDKSuite.scala diff --git a/src/test/scala/com/microsoft/ml/spark/core/utils/ModelEqualitySuite.scala b/cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/ModelEqualitySuite.scala similarity index 94% rename from src/test/scala/com/microsoft/ml/spark/core/utils/ModelEqualitySuite.scala rename to cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/ModelEqualitySuite.scala index 620ab98aa28..d88d70d63af 100644 --- a/src/test/scala/com/microsoft/ml/spark/core/utils/ModelEqualitySuite.scala +++ b/cognitive/src/test/scala/com/microsoft/ml/spark/core/utils/utils/ModelEqualitySuite.scala @@ -1,11 +1,12 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. See LICENSE in project root for information. -package com.microsoft.ml.spark.core.utils +package com.microsoft.ml.spark.core.utils.utils import com.microsoft.ml.spark.cognitive.TextSentiment import com.microsoft.ml.spark.core.env.FileUtilities.join import com.microsoft.ml.spark.core.test.base.TestBase +import com.microsoft.ml.spark.core.utils.ModelEquality import com.microsoft.ml.spark.stages.DropColumns class ModelEqualitySuite extends TestBase { diff --git a/src/main/python/LICENSE.txt b/core/src/main/python/LICENSE.txt similarity index 100% rename from src/main/python/LICENSE.txt rename to core/src/main/python/LICENSE.txt diff --git a/src/main/python/MANIFEST.in b/core/src/main/python/MANIFEST.in similarity index 100% rename from src/main/python/MANIFEST.in rename to core/src/main/python/MANIFEST.in diff --git a/src/main/python/__init__.py b/core/src/main/python/__init__.py similarity index 100% rename from src/main/python/__init__.py rename to core/src/main/python/__init__.py diff --git a/src/main/python/mmlspark/README.txt b/core/src/main/python/mmlspark/README.txt similarity index 100% rename from src/main/python/mmlspark/README.txt rename to core/src/main/python/mmlspark/README.txt diff --git a/src/main/python/mmlspark/__init__.py b/core/src/main/python/mmlspark/__init__.py similarity index 100% rename from src/main/python/mmlspark/__init__.py rename to core/src/main/python/mmlspark/__init__.py diff --git a/src/main/python/mmlspark/automl/BestModel.py b/core/src/main/python/mmlspark/automl/BestModel.py similarity index 100% rename from src/main/python/mmlspark/automl/BestModel.py rename to core/src/main/python/mmlspark/automl/BestModel.py diff --git a/src/main/python/mmlspark/automl/HyperparamBuilder.py b/core/src/main/python/mmlspark/automl/HyperparamBuilder.py similarity index 100% rename from src/main/python/mmlspark/automl/HyperparamBuilder.py rename to core/src/main/python/mmlspark/automl/HyperparamBuilder.py diff --git a/src/main/python/mmlspark/automl/TuneHyperparametersModel.py b/core/src/main/python/mmlspark/automl/TuneHyperparametersModel.py similarity index 100% rename from src/main/python/mmlspark/automl/TuneHyperparametersModel.py rename to core/src/main/python/mmlspark/automl/TuneHyperparametersModel.py diff --git a/src/main/python/mmlspark/automl/__init__.py b/core/src/main/python/mmlspark/automl/__init__.py similarity index 100% rename from src/main/python/mmlspark/automl/__init__.py rename to core/src/main/python/mmlspark/automl/__init__.py diff --git a/src/main/python/mmlspark/cntk/__init__.py b/core/src/main/python/mmlspark/core/__init__.py similarity index 100% rename from src/main/python/mmlspark/cntk/__init__.py rename to core/src/main/python/mmlspark/core/__init__.py diff --git a/src/main/python/mmlspark/core/schema/TypeConversionUtils.py b/core/src/main/python/mmlspark/core/schema/TypeConversionUtils.py similarity index 100% rename from src/main/python/mmlspark/core/schema/TypeConversionUtils.py rename to core/src/main/python/mmlspark/core/schema/TypeConversionUtils.py diff --git a/src/main/python/mmlspark/core/schema/Utils.py b/core/src/main/python/mmlspark/core/schema/Utils.py similarity index 100% rename from src/main/python/mmlspark/core/schema/Utils.py rename to core/src/main/python/mmlspark/core/schema/Utils.py diff --git a/src/main/python/mmlspark/cognitive/__init__.py b/core/src/main/python/mmlspark/core/schema/__init__.py similarity index 100% rename from src/main/python/mmlspark/cognitive/__init__.py rename to core/src/main/python/mmlspark/core/schema/__init__.py diff --git a/src/main/python/mmlspark/core/__init__.py b/core/src/main/python/mmlspark/core/serialize/__init__.py similarity index 100% rename from src/main/python/mmlspark/core/__init__.py rename to core/src/main/python/mmlspark/core/serialize/__init__.py diff --git a/src/main/python/mmlspark/core/serialize/java_params_patch.py b/core/src/main/python/mmlspark/core/serialize/java_params_patch.py similarity index 100% rename from src/main/python/mmlspark/core/serialize/java_params_patch.py rename to core/src/main/python/mmlspark/core/serialize/java_params_patch.py diff --git a/src/main/python/mmlspark/core/spark/FluentAPI.py b/core/src/main/python/mmlspark/core/spark/FluentAPI.py similarity index 100% rename from src/main/python/mmlspark/core/spark/FluentAPI.py rename to core/src/main/python/mmlspark/core/spark/FluentAPI.py diff --git a/src/main/python/mmlspark/core/schema/__init__.py b/core/src/main/python/mmlspark/core/spark/__init__.py similarity index 100% rename from src/main/python/mmlspark/core/schema/__init__.py rename to core/src/main/python/mmlspark/core/spark/__init__.py diff --git a/src/main/python/mmlspark/core/serialize/__init__.py b/core/src/main/python/mmlspark/cyber/__init__.py similarity index 100% rename from src/main/python/mmlspark/core/serialize/__init__.py rename to core/src/main/python/mmlspark/cyber/__init__.py diff --git a/src/main/python/mmlspark/core/spark/__init__.py b/core/src/main/python/mmlspark/cyber/anomaly/__init__.py similarity index 100% rename from src/main/python/mmlspark/core/spark/__init__.py rename to core/src/main/python/mmlspark/cyber/anomaly/__init__.py diff --git a/src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py b/core/src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py similarity index 100% rename from src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py rename to core/src/main/python/mmlspark/cyber/anomaly/collaborative_filtering.py diff --git a/src/main/python/mmlspark/cyber/anomaly/complement_access.py b/core/src/main/python/mmlspark/cyber/anomaly/complement_access.py similarity index 100% rename from src/main/python/mmlspark/cyber/anomaly/complement_access.py rename to core/src/main/python/mmlspark/cyber/anomaly/complement_access.py diff --git a/src/main/python/mmlspark/cyber/dataset.py b/core/src/main/python/mmlspark/cyber/dataset.py similarity index 100% rename from src/main/python/mmlspark/cyber/dataset.py rename to core/src/main/python/mmlspark/cyber/dataset.py diff --git a/src/main/python/mmlspark/cyber/__init__.py b/core/src/main/python/mmlspark/cyber/feature/__init__.py similarity index 100% rename from src/main/python/mmlspark/cyber/__init__.py rename to core/src/main/python/mmlspark/cyber/feature/__init__.py diff --git a/src/main/python/mmlspark/cyber/feature/indexers.py b/core/src/main/python/mmlspark/cyber/feature/indexers.py similarity index 100% rename from src/main/python/mmlspark/cyber/feature/indexers.py rename to core/src/main/python/mmlspark/cyber/feature/indexers.py diff --git a/src/main/python/mmlspark/cyber/feature/scalers.py b/core/src/main/python/mmlspark/cyber/feature/scalers.py similarity index 100% rename from src/main/python/mmlspark/cyber/feature/scalers.py rename to core/src/main/python/mmlspark/cyber/feature/scalers.py diff --git a/src/main/python/mmlspark/cyber/anomaly/__init__.py b/core/src/main/python/mmlspark/cyber/utils/__init__.py similarity index 100% rename from src/main/python/mmlspark/cyber/anomaly/__init__.py rename to core/src/main/python/mmlspark/cyber/utils/__init__.py diff --git a/src/main/python/mmlspark/cyber/utils/spark_utils.py b/core/src/main/python/mmlspark/cyber/utils/spark_utils.py similarity index 100% rename from src/main/python/mmlspark/cyber/utils/spark_utils.py rename to core/src/main/python/mmlspark/cyber/utils/spark_utils.py diff --git a/src/main/python/mmlspark/doc/conf.py b/core/src/main/python/mmlspark/doc/conf.py similarity index 100% rename from src/main/python/mmlspark/doc/conf.py rename to core/src/main/python/mmlspark/doc/conf.py diff --git a/src/main/python/mmlspark/doc/index.rst b/core/src/main/python/mmlspark/doc/index.rst similarity index 100% rename from src/main/python/mmlspark/doc/index.rst rename to core/src/main/python/mmlspark/doc/index.rst diff --git a/src/main/python/mmlspark/doc/scala.rst b/core/src/main/python/mmlspark/doc/scala.rst similarity index 100% rename from src/main/python/mmlspark/doc/scala.rst rename to core/src/main/python/mmlspark/doc/scala.rst diff --git a/src/main/python/mmlspark/downloader/ModelDownloader.py b/core/src/main/python/mmlspark/downloader/ModelDownloader.py similarity index 100% rename from src/main/python/mmlspark/downloader/ModelDownloader.py rename to core/src/main/python/mmlspark/downloader/ModelDownloader.py diff --git a/src/main/python/mmlspark/cyber/feature/__init__.py b/core/src/main/python/mmlspark/downloader/__init__.py similarity index 100% rename from src/main/python/mmlspark/cyber/feature/__init__.py rename to core/src/main/python/mmlspark/downloader/__init__.py diff --git a/src/main/python/mmlspark/io/IOImplicits.py b/core/src/main/python/mmlspark/io/IOImplicits.py similarity index 100% rename from src/main/python/mmlspark/io/IOImplicits.py rename to core/src/main/python/mmlspark/io/IOImplicits.py diff --git a/src/main/python/mmlspark/cyber/utils/__init__.py b/core/src/main/python/mmlspark/io/__init__.py similarity index 100% rename from src/main/python/mmlspark/cyber/utils/__init__.py rename to core/src/main/python/mmlspark/io/__init__.py diff --git a/src/main/python/mmlspark/io/binary/BinaryFileReader.py b/core/src/main/python/mmlspark/io/binary/BinaryFileReader.py similarity index 100% rename from src/main/python/mmlspark/io/binary/BinaryFileReader.py rename to core/src/main/python/mmlspark/io/binary/BinaryFileReader.py diff --git a/src/main/python/mmlspark/downloader/__init__.py b/core/src/main/python/mmlspark/io/binary/__init__.py similarity index 100% rename from src/main/python/mmlspark/downloader/__init__.py rename to core/src/main/python/mmlspark/io/binary/__init__.py diff --git a/src/main/python/mmlspark/io/http/HTTPFunctions.py b/core/src/main/python/mmlspark/io/http/HTTPFunctions.py similarity index 100% rename from src/main/python/mmlspark/io/http/HTTPFunctions.py rename to core/src/main/python/mmlspark/io/http/HTTPFunctions.py diff --git a/src/main/python/mmlspark/io/http/JSONOutputParser.py b/core/src/main/python/mmlspark/io/http/JSONOutputParser.py similarity index 100% rename from src/main/python/mmlspark/io/http/JSONOutputParser.py rename to core/src/main/python/mmlspark/io/http/JSONOutputParser.py diff --git a/src/main/python/mmlspark/io/http/ServingFunctions.py b/core/src/main/python/mmlspark/io/http/ServingFunctions.py similarity index 100% rename from src/main/python/mmlspark/io/http/ServingFunctions.py rename to core/src/main/python/mmlspark/io/http/ServingFunctions.py diff --git a/src/main/python/mmlspark/io/http/SimpleHTTPTransformer.py b/core/src/main/python/mmlspark/io/http/SimpleHTTPTransformer.py similarity index 100% rename from src/main/python/mmlspark/io/http/SimpleHTTPTransformer.py rename to core/src/main/python/mmlspark/io/http/SimpleHTTPTransformer.py diff --git a/src/main/python/mmlspark/image/__init__.py b/core/src/main/python/mmlspark/io/http/__init__.py similarity index 100% rename from src/main/python/mmlspark/image/__init__.py rename to core/src/main/python/mmlspark/io/http/__init__.py diff --git a/src/main/python/mmlspark/io/image/ImageUtils.py b/core/src/main/python/mmlspark/io/image/ImageUtils.py similarity index 100% rename from src/main/python/mmlspark/io/image/ImageUtils.py rename to core/src/main/python/mmlspark/io/image/ImageUtils.py diff --git a/src/main/python/mmlspark/io/__init__.py b/core/src/main/python/mmlspark/io/image/__init__.py similarity index 100% rename from src/main/python/mmlspark/io/__init__.py rename to core/src/main/python/mmlspark/io/image/__init__.py diff --git a/src/main/python/mmlspark/io/powerbi/PowerBIWriter.py b/core/src/main/python/mmlspark/io/powerbi/PowerBIWriter.py similarity index 100% rename from src/main/python/mmlspark/io/powerbi/PowerBIWriter.py rename to core/src/main/python/mmlspark/io/powerbi/PowerBIWriter.py diff --git a/src/main/python/mmlspark/io/binary/__init__.py b/core/src/main/python/mmlspark/io/powerbi/__init__.py similarity index 100% rename from src/main/python/mmlspark/io/binary/__init__.py rename to core/src/main/python/mmlspark/io/powerbi/__init__.py diff --git a/src/main/python/mmlspark/nn/ConditionalBallTree.py b/core/src/main/python/mmlspark/nn/ConditionalBallTree.py similarity index 100% rename from src/main/python/mmlspark/nn/ConditionalBallTree.py rename to core/src/main/python/mmlspark/nn/ConditionalBallTree.py diff --git a/src/main/python/mmlspark/io/http/__init__.py b/core/src/main/python/mmlspark/nn/__init__.py similarity index 100% rename from src/main/python/mmlspark/io/http/__init__.py rename to core/src/main/python/mmlspark/nn/__init__.py diff --git a/src/main/python/mmlspark/io/image/__init__.py b/core/src/main/python/mmlspark/plot/__init__.py similarity index 100% rename from src/main/python/mmlspark/io/image/__init__.py rename to core/src/main/python/mmlspark/plot/__init__.py diff --git a/src/main/python/mmlspark/plot/plot.py b/core/src/main/python/mmlspark/plot/plot.py similarity index 100% rename from src/main/python/mmlspark/plot/plot.py rename to core/src/main/python/mmlspark/plot/plot.py diff --git a/src/main/python/mmlspark/recommendation/RankingTrainValidationSplit.py b/core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplit.py similarity index 100% rename from src/main/python/mmlspark/recommendation/RankingTrainValidationSplit.py rename to core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplit.py diff --git a/src/main/python/mmlspark/recommendation/RankingTrainValidationSplitModel.py b/core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplitModel.py similarity index 100% rename from src/main/python/mmlspark/recommendation/RankingTrainValidationSplitModel.py rename to core/src/main/python/mmlspark/recommendation/RankingTrainValidationSplitModel.py diff --git a/src/main/python/mmlspark/recommendation/SARModel.py b/core/src/main/python/mmlspark/recommendation/SARModel.py similarity index 100% rename from src/main/python/mmlspark/recommendation/SARModel.py rename to core/src/main/python/mmlspark/recommendation/SARModel.py diff --git a/src/main/python/mmlspark/recommendation/__init__.py b/core/src/main/python/mmlspark/recommendation/__init__.py similarity index 100% rename from src/main/python/mmlspark/recommendation/__init__.py rename to core/src/main/python/mmlspark/recommendation/__init__.py diff --git a/src/main/python/mmlspark/stages/UDFTransformer.py b/core/src/main/python/mmlspark/stages/UDFTransformer.py similarity index 100% rename from src/main/python/mmlspark/stages/UDFTransformer.py rename to core/src/main/python/mmlspark/stages/UDFTransformer.py diff --git a/src/main/python/mmlspark/io/powerbi/__init__.py b/core/src/main/python/mmlspark/stages/__init__.py similarity index 100% rename from src/main/python/mmlspark/io/powerbi/__init__.py rename to core/src/main/python/mmlspark/stages/__init__.py diff --git a/src/main/scala/com/microsoft/ml/spark/automl/DefaultHyperparams.scala b/core/src/main/scala/com/microsoft/ml/spark/automl/DefaultHyperparams.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/automl/DefaultHyperparams.scala rename to core/src/main/scala/com/microsoft/ml/spark/automl/DefaultHyperparams.scala diff --git a/src/main/scala/com/microsoft/ml/spark/automl/EvaluationUtils.scala b/core/src/main/scala/com/microsoft/ml/spark/automl/EvaluationUtils.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/automl/EvaluationUtils.scala rename to core/src/main/scala/com/microsoft/ml/spark/automl/EvaluationUtils.scala diff --git a/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.scala b/core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.scala rename to core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.scala diff --git a/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.txt b/core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.txt similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.txt rename to core/src/main/scala/com/microsoft/ml/spark/automl/FindBestModel.txt diff --git a/src/main/scala/com/microsoft/ml/spark/automl/HyperparamBuilder.scala b/core/src/main/scala/com/microsoft/ml/spark/automl/HyperparamBuilder.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/automl/HyperparamBuilder.scala rename to core/src/main/scala/com/microsoft/ml/spark/automl/HyperparamBuilder.scala diff --git a/src/main/scala/com/microsoft/ml/spark/automl/ParamSpace.scala b/core/src/main/scala/com/microsoft/ml/spark/automl/ParamSpace.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/automl/ParamSpace.scala rename to core/src/main/scala/com/microsoft/ml/spark/automl/ParamSpace.scala diff --git a/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.scala b/core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.scala similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.scala rename to core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.scala diff --git a/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.txt b/core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.txt similarity index 100% rename from src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.txt rename to core/src/main/scala/com/microsoft/ml/spark/automl/TuneHyperparameters.txt diff --git a/core/src/main/scala/com/microsoft/ml/spark/codegen/CodeGen.scala b/core/src/main/scala/com/microsoft/ml/spark/codegen/CodeGen.scala new file mode 100644 index 00000000000..72de88bd22c --- /dev/null +++ b/core/src/main/scala/com/microsoft/ml/spark/codegen/CodeGen.scala @@ -0,0 +1,202 @@ +// Copyright (C) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. See LICENSE in project root for information. + +package com.microsoft.ml.spark.codegen + +import java.io.File + +import com.microsoft.ml.spark.codegen.CodegenConfigProtocol._ +import com.microsoft.ml.spark.core.env.FileUtilities._ +import org.apache.commons.io.FileUtils +import org.apache.commons.io.FilenameUtils._ +import com.microsoft.ml.spark.core.utils.JarLoadingUtils.instantiateServices +import spray.json._ + +object CodeGenUtils { + def clean(dir: File): Unit = if (dir.exists()) FileUtils.forceDelete(dir) + + def toDir(f: File): File = new File(f, File.separator) +} + + +object CodeGen { + + import CodeGenUtils._ + + def generatePythonClasses(conf: CodegenConfig): Unit = { + val instantiatedClasses = instantiateServices[PythonWrappable](conf.jarName) + instantiatedClasses.foreach { w => + println(w.getClass.getName) + w.makePyFile(conf) + } + } + + def generateRClasses(conf: CodegenConfig): Unit = { + val instantiatedClasses = instantiateServices[RWrappable](conf.jarName) + instantiatedClasses.foreach { w => + println(w.getClass.getName) + w.makeRFile(conf) + } + } + + private def makeInitFiles(conf: CodegenConfig, packageFolder: String = ""): Unit = { + val dir = new File(new File(conf.pySrcDir, "mmlspark"), packageFolder) + val packageString = if (packageFolder != "") packageFolder.replace("/", ".") else "" + val importStrings = + dir.listFiles.filter(_.isFile).sorted + .map(_.getName) + .filter(name => name.endsWith(".py") && !name.startsWith("_") && !name.startsWith("test")) + .map(name => s"from mmlspark$packageString.${getBaseName(name)} import *\n").mkString("") + val initFile = new File(dir, "__init__.py") + if (packageFolder != "") { + writeFile(initFile, conf.packageHelp(importStrings)) + } else if (initFile.exists()) { + initFile.delete() + } + dir.listFiles().filter(_.isDirectory).foreach(f => + makeInitFiles(conf, packageFolder + "/" + f.getName) + ) + } + + //noinspection ScalaStyle + def generateRPackageData(conf: CodegenConfig): Unit = { + // description file; need to encode version as decimal + val today = new java.text.SimpleDateFormat("yyyy-MM-dd") + .format(new java.util.Date()) + + conf.rSrcDir.mkdirs() + writeFile(new File(conf.rSrcDir.getParentFile, "DESCRIPTION"), + s"""|Package: ${conf.name} + |Title: Access to MMLSpark via R + |Description: Provides an interface to MMLSpark. + |Version: ${conf.rVersion} + |Date: $today + |Author: Microsoft Corporation + |Maintainer: MMLSpark Team
UnicodeNormalize
takes a dataframe and normalizes the unicode representation.
- */
-class UnicodeNormalize(val uid: String) extends Transformer
- with HasInputCol with HasOutputCol with Wrappable with ComplexParamsWritable with BasicLogging {
- logClass()
-
- def this() = this(Identifiable.randomUID("UnicodeNormalize"))
-
- val form = new Param[String](this, "form", "Unicode normalization form: NFC, NFD, NFKC, NFKD")
-
- /** @group getParam */
- def getForm: String = get(form).getOrElse("NFKD")
-
- /** @group setParam */
- def setForm(value: String): this.type = {
- // check input value
- Normalizer.Form.valueOf(getForm)
-
- set("form", value)
- }
-
- val lower = new BooleanParam(this, "lower", "Lowercase text")
-
- /** @group getParam */
- def getLower: Boolean = get(lower).getOrElse(true)
-
- /** @group setParam */
- def setLower(value: Boolean): this.type = set("lower", value)
-
- /** @param dataset - The input dataset, to be transformed
- * @return The DataFrame that results from column selection
- */
- override def transform(dataset: Dataset[_]): DataFrame = {
- logTransform[DataFrame]({
- val inputIndex = dataset.columns.indexOf(getInputCol)
-
- require(inputIndex != -1, s"Input column $getInputCol does not exist")
-
- val normalizeFunc = (value: String) =>
- if (value == null) null
- else Normalizer.normalize(value, Normalizer.Form.valueOf(getForm))
-
- val f = if (getLower)
- (value: String) => Option(value).map(s => normalizeFunc(s.toLowerCase)).orNull
- else
- normalizeFunc
-
- val textMapper = udf(f)
-
- dataset.withColumn(getOutputCol, textMapper(dataset(getInputCol)).as(getOutputCol))
- })
- }
-
- def transformSchema(schema: StructType): StructType = {
- schema.add(StructField(getOutputCol, StringType))
- }
-
- def copy(extra: ParamMap): UnicodeNormalize = defaultCopy(extra)
-
-}
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See LICENSE in project root for information.
+
+package com.microsoft.ml.spark.stages
+
+import org.apache.spark.ml.{ComplexParamsReadable, ComplexParamsWritable, Transformer}
+import org.apache.spark.ml.param.{BooleanParam, Param, ParamMap}
+import org.apache.spark.ml.util.Identifiable
+import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.functions.udf
+
+import java.text.Normalizer
+import com.microsoft.ml.spark.codegen.Wrappable
+import com.microsoft.ml.spark.core.contracts.{HasInputCol, HasOutputCol}
+import com.microsoft.ml.spark.logging.BasicLogging
+import org.apache.spark.sql.types.{StringType, StructField, StructType}
+
+object UnicodeNormalize extends ComplexParamsReadable[UnicodeNormalize]
+
+/** UnicodeNormalize
takes a dataframe and normalizes the unicode representation.
+ */
+class UnicodeNormalize(val uid: String) extends Transformer
+ with HasInputCol with HasOutputCol with Wrappable with ComplexParamsWritable with BasicLogging {
+ logClass()
+
+ def this() = this(Identifiable.randomUID("UnicodeNormalize"))
+
+ val form = new Param[String](this, "form", "Unicode normalization form: NFC, NFD, NFKC, NFKD")
+
+ /** @group getParam */
+ def getForm: String = get(form).getOrElse("NFKD")
+
+ /** @group setParam */
+ def setForm(value: String): this.type = {
+ // check input value
+ Normalizer.Form.valueOf(getForm)
+
+ set("form", value)
+ }
+
+ val lower = new BooleanParam(this, "lower", "Lowercase text")
+
+ /** @group getParam */
+ def getLower: Boolean = get(lower).getOrElse(true)
+
+ /** @group setParam */
+ def setLower(value: Boolean): this.type = set("lower", value)
+
+ /** @param dataset - The input dataset, to be transformed
+ * @return The DataFrame that results from column selection
+ */
+ override def transform(dataset: Dataset[_]): DataFrame = {
+ logTransform[DataFrame]({
+ val inputIndex = dataset.columns.indexOf(getInputCol)
+
+ require(inputIndex != -1, s"Input column $getInputCol does not exist")
+
+ val normalizeFunc = (value: String) =>
+ if (value == null) null
+ else Normalizer.normalize(value, Normalizer.Form.valueOf(getForm))
+
+ val f = if (getLower)
+ (value: String) => Option(value).map(s => normalizeFunc(s.toLowerCase)).orNull
+ else
+ normalizeFunc
+
+ val textMapper = udf(f)
+
+ dataset.withColumn(getOutputCol, textMapper(dataset(getInputCol)).as(getOutputCol))
+ })
+ }
+
+ def transformSchema(schema: StructType): StructType = {
+ schema.add(StructField(getOutputCol, StringType))
+ }
+
+ def copy(extra: ParamMap): UnicodeNormalize = defaultCopy(extra)
+
+}
diff --git a/src/main/scala/com/microsoft/ml/spark/stages/udfs.scala b/core/src/main/scala/com/microsoft/ml/spark/stages/udfs.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/stages/udfs.scala
rename to core/src/main/scala/com/microsoft/ml/spark/stages/udfs.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/train/AutoTrainedModel.scala b/core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainedModel.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/AutoTrainedModel.scala
rename to core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainedModel.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/train/AutoTrainer.scala b/core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainer.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/AutoTrainer.scala
rename to core/src/main/scala/com/microsoft/ml/spark/train/AutoTrainer.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.scala b/core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.scala
rename to core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.txt b/core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.txt
rename to core/src/main/scala/com/microsoft/ml/spark/train/ComputeModelStatistics.txt
diff --git a/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.scala b/core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.scala
rename to core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.txt b/core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.txt
rename to core/src/main/scala/com/microsoft/ml/spark/train/ComputePerInstanceStatistics.txt
diff --git a/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.scala b/core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.scala
rename to core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.txt b/core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.txt
rename to core/src/main/scala/com/microsoft/ml/spark/train/TrainClassifier.txt
diff --git a/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.scala b/core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.scala
rename to core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.txt b/core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.txt
rename to core/src/main/scala/com/microsoft/ml/spark/train/TrainRegressor.txt
diff --git a/src/main/scala/org/apache/spark/lightgbm/BlockManagerUtils.scala b/core/src/main/scala/org/apache/spark/injections/BlockManagerUtils.scala
similarity index 92%
rename from src/main/scala/org/apache/spark/lightgbm/BlockManagerUtils.scala
rename to core/src/main/scala/org/apache/spark/injections/BlockManagerUtils.scala
index ee0ba74dd41..6d0564abb4b 100644
--- a/src/main/scala/org/apache/spark/lightgbm/BlockManagerUtils.scala
+++ b/core/src/main/scala/org/apache/spark/injections/BlockManagerUtils.scala
@@ -1,13 +1,14 @@
// Copyright (C) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License. See LICENSE in project root for information.
-package org.apache.spark.lightgbm
+package org.apache.spark.injections
import org.apache.spark.sql.Dataset
import org.apache.spark.storage.BlockManager
object BlockManagerUtils {
/** Returns the block manager from the dataframe's spark context.
+ *
* @param data The dataframe to get the block manager from.
* @return The block manager.
*/
diff --git a/src/main/scala/org/apache/spark/injections/RegressionUtils.scala b/core/src/main/scala/org/apache/spark/injections/RegressionUtils.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/injections/RegressionUtils.scala
rename to core/src/main/scala/org/apache/spark/injections/RegressionUtils.scala
diff --git a/src/main/scala/org/apache/spark/injections/SConf.scala b/core/src/main/scala/org/apache/spark/injections/SConf.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/injections/SConf.scala
rename to core/src/main/scala/org/apache/spark/injections/SConf.scala
diff --git a/src/main/scala/org/apache/spark/injections/UDFUtils.scala b/core/src/main/scala/org/apache/spark/injections/UDFUtils.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/injections/UDFUtils.scala
rename to core/src/main/scala/org/apache/spark/injections/UDFUtils.scala
diff --git a/src/main/scala/org/apache/spark/ml/ComplexParamsSerializer.scala b/core/src/main/scala/org/apache/spark/ml/ComplexParamsSerializer.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/ComplexParamsSerializer.scala
rename to core/src/main/scala/org/apache/spark/ml/ComplexParamsSerializer.scala
diff --git a/src/main/scala/org/apache/spark/ml/LimeNamespaceInjections.scala b/core/src/main/scala/org/apache/spark/ml/LimeNamespaceInjections.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/LimeNamespaceInjections.scala
rename to core/src/main/scala/org/apache/spark/ml/LimeNamespaceInjections.scala
diff --git a/src/main/scala/org/apache/spark/ml/NamespaceInjections.scala b/core/src/main/scala/org/apache/spark/ml/NamespaceInjections.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/NamespaceInjections.scala
rename to core/src/main/scala/org/apache/spark/ml/NamespaceInjections.scala
diff --git a/src/main/scala/org/apache/spark/ml/Ranker.scala b/core/src/main/scala/org/apache/spark/ml/Ranker.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/Ranker.scala
rename to core/src/main/scala/org/apache/spark/ml/Ranker.scala
diff --git a/src/main/scala/org/apache/spark/ml/RegressorUtils.scala b/core/src/main/scala/org/apache/spark/ml/RegressorUtils.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/RegressorUtils.scala
rename to core/src/main/scala/org/apache/spark/ml/RegressorUtils.scala
diff --git a/src/main/scala/org/apache/spark/ml/Serializer.scala b/core/src/main/scala/org/apache/spark/ml/Serializer.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/Serializer.scala
rename to core/src/main/scala/org/apache/spark/ml/Serializer.scala
diff --git a/src/main/scala/org/apache/spark/ml/feature/FastVectorAssembler.scala b/core/src/main/scala/org/apache/spark/ml/feature/FastVectorAssembler.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/feature/FastVectorAssembler.scala
rename to core/src/main/scala/org/apache/spark/ml/feature/FastVectorAssembler.scala
diff --git a/src/main/scala/org/apache/spark/ml/feature/FastVectorAssembler.txt b/core/src/main/scala/org/apache/spark/ml/feature/FastVectorAssembler.txt
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/feature/FastVectorAssembler.txt
rename to core/src/main/scala/org/apache/spark/ml/feature/FastVectorAssembler.txt
diff --git a/src/main/scala/org/apache/spark/ml/param/ArrayMapParam.scala b/core/src/main/scala/org/apache/spark/ml/param/ArrayMapParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/ArrayMapParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/ArrayMapParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/ArrayParamMapParam.scala b/core/src/main/scala/org/apache/spark/ml/param/ArrayParamMapParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/ArrayParamMapParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/ArrayParamMapParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/BallTreeParam.scala b/core/src/main/scala/org/apache/spark/ml/param/BallTreeParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/BallTreeParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/BallTreeParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/ByteArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/ByteArrayParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/ByteArrayParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/ByteArrayParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/DataFrameParam.scala b/core/src/main/scala/org/apache/spark/ml/param/DataFrameParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/DataFrameParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/DataFrameParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/DataTypeParam.scala b/core/src/main/scala/org/apache/spark/ml/param/DataTypeParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/DataTypeParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/DataTypeParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/EstimatorArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/EstimatorArrayParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/EstimatorArrayParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/EstimatorArrayParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/EstimatorParam.scala b/core/src/main/scala/org/apache/spark/ml/param/EstimatorParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/EstimatorParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/EstimatorParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/EvaluatorParam.scala b/core/src/main/scala/org/apache/spark/ml/param/EvaluatorParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/EvaluatorParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/EvaluatorParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/JsonEncodableParam.scala b/core/src/main/scala/org/apache/spark/ml/param/JsonEncodableParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/JsonEncodableParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/JsonEncodableParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/MapArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/MapArrayParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/MapArrayParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/MapArrayParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/MapParam.scala b/core/src/main/scala/org/apache/spark/ml/param/MapParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/MapParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/MapParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/ParamSpaceParam.scala b/core/src/main/scala/org/apache/spark/ml/param/ParamSpaceParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/ParamSpaceParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/ParamSpaceParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/PipelineStageParam.scala b/core/src/main/scala/org/apache/spark/ml/param/PipelineStageParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/PipelineStageParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/PipelineStageParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/PythonWrappableParam.scala b/core/src/main/scala/org/apache/spark/ml/param/PythonWrappableParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/PythonWrappableParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/PythonWrappableParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/RWrappableParam.scala b/core/src/main/scala/org/apache/spark/ml/param/RWrappableParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/RWrappableParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/RWrappableParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/TransformerArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/TransformerArrayParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/TransformerArrayParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/TransformerArrayParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/TransformerParam.scala b/core/src/main/scala/org/apache/spark/ml/param/TransformerParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/TransformerParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/TransformerParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/TypedArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/TypedArrayParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/TypedArrayParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/TypedArrayParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/UDFParam.scala b/core/src/main/scala/org/apache/spark/ml/param/UDFParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/UDFParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/UDFParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/UDPyFParam.scala b/core/src/main/scala/org/apache/spark/ml/param/UDPyFParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/UDPyFParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/UDPyFParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala b/core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala
rename to core/src/main/scala/org/apache/spark/ml/param/UntypedArrayParam.scala
diff --git a/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala b/core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala
rename to core/src/main/scala/org/apache/spark/ml/recommendation/RecommendationHelper.scala
diff --git a/src/main/scala/org/apache/spark/ml/source/image/PatchedImageFileFormat.scala b/core/src/main/scala/org/apache/spark/ml/source/image/PatchedImageFileFormat.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/ml/source/image/PatchedImageFileFormat.scala
rename to core/src/main/scala/org/apache/spark/ml/source/image/PatchedImageFileFormat.scala
diff --git a/src/main/scala/org/apache/spark/sql/execution/streaming/DistributedHTTPSource.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/DistributedHTTPSource.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/sql/execution/streaming/DistributedHTTPSource.scala
rename to core/src/main/scala/org/apache/spark/sql/execution/streaming/DistributedHTTPSource.scala
diff --git a/src/main/scala/org/apache/spark/sql/execution/streaming/HTTPSource.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/HTTPSource.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/sql/execution/streaming/HTTPSource.scala
rename to core/src/main/scala/org/apache/spark/sql/execution/streaming/HTTPSource.scala
diff --git a/src/main/scala/org/apache/spark/sql/execution/streaming/ServingUDFs.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/ServingUDFs.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/sql/execution/streaming/ServingUDFs.scala
rename to core/src/main/scala/org/apache/spark/sql/execution/streaming/ServingUDFs.scala
diff --git a/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSinkV2.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSinkV2.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSinkV2.scala
rename to core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSinkV2.scala
diff --git a/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSourceV2.scala b/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSourceV2.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSourceV2.scala
rename to core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/HTTPSourceV2.scala
diff --git a/src/main/scala/org/apache/spark/sql/types/injections/MetadataUtilities.scala b/core/src/main/scala/org/apache/spark/sql/types/injections/MetadataUtilities.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/sql/types/injections/MetadataUtilities.scala
rename to core/src/main/scala/org/apache/spark/sql/types/injections/MetadataUtilities.scala
diff --git a/src/main/scala/org/apache/spark/sql/types/injections/OptimizedCKNNFitting.scala b/core/src/main/scala/org/apache/spark/sql/types/injections/OptimizedCKNNFitting.scala
similarity index 100%
rename from src/main/scala/org/apache/spark/sql/types/injections/OptimizedCKNNFitting.scala
rename to core/src/main/scala/org/apache/spark/sql/types/injections/OptimizedCKNNFitting.scala
diff --git a/src/test/R/testthat.R b/core/src/test/R/testthat.R
similarity index 100%
rename from src/test/R/testthat.R
rename to core/src/test/R/testthat.R
diff --git a/src/test/R/testthat/setup-spark.R b/core/src/test/R/testthat/setup-spark.R
similarity index 100%
rename from src/test/R/testthat/setup-spark.R
rename to core/src/test/R/testthat/setup-spark.R
diff --git a/src/test/R/testthat/test-basic.R b/core/src/test/R/testthat/test-basic.R
similarity index 100%
rename from src/test/R/testthat/test-basic.R
rename to core/src/test/R/testthat/test-basic.R
diff --git a/src/test/python/LICENSE.txt b/core/src/test/python/LICENSE.txt
similarity index 100%
rename from src/test/python/LICENSE.txt
rename to core/src/test/python/LICENSE.txt
diff --git a/src/test/python/MANIFEST.in b/core/src/test/python/MANIFEST.in
similarity index 100%
rename from src/test/python/MANIFEST.in
rename to core/src/test/python/MANIFEST.in
diff --git a/src/main/python/mmlspark/lightgbm/__init__.py b/core/src/test/python/__init__.py
similarity index 100%
rename from src/main/python/mmlspark/lightgbm/__init__.py
rename to core/src/test/python/__init__.py
diff --git a/src/main/python/mmlspark/nn/__init__.py b/core/src/test/python/mmlsparktest/__init__.py
similarity index 100%
rename from src/main/python/mmlspark/nn/__init__.py
rename to core/src/test/python/mmlsparktest/__init__.py
diff --git a/src/main/python/mmlspark/opencv/__init__.py b/core/src/test/python/mmlsparktest/cyber/__init__.py
similarity index 100%
rename from src/main/python/mmlspark/opencv/__init__.py
rename to core/src/test/python/mmlsparktest/cyber/__init__.py
diff --git a/src/main/python/mmlspark/plot/__init__.py b/core/src/test/python/mmlsparktest/cyber/anamoly/__init__.py
similarity index 100%
rename from src/main/python/mmlspark/plot/__init__.py
rename to core/src/test/python/mmlsparktest/cyber/anamoly/__init__.py
diff --git a/src/test/python/mmlsparktest/cyber/anamoly/test_collaborative_filtering.py b/core/src/test/python/mmlsparktest/cyber/anamoly/test_collaborative_filtering.py
similarity index 100%
rename from src/test/python/mmlsparktest/cyber/anamoly/test_collaborative_filtering.py
rename to core/src/test/python/mmlsparktest/cyber/anamoly/test_collaborative_filtering.py
diff --git a/src/test/python/mmlsparktest/cyber/anamoly/test_complement_access.py b/core/src/test/python/mmlsparktest/cyber/anamoly/test_complement_access.py
similarity index 100%
rename from src/test/python/mmlsparktest/cyber/anamoly/test_complement_access.py
rename to core/src/test/python/mmlsparktest/cyber/anamoly/test_complement_access.py
diff --git a/src/test/python/mmlsparktest/cyber/explain_tester.py b/core/src/test/python/mmlsparktest/cyber/explain_tester.py
similarity index 100%
rename from src/test/python/mmlsparktest/cyber/explain_tester.py
rename to core/src/test/python/mmlsparktest/cyber/explain_tester.py
diff --git a/src/main/python/mmlspark/stages/__init__.py b/core/src/test/python/mmlsparktest/cyber/feature/__init__.py
similarity index 100%
rename from src/main/python/mmlspark/stages/__init__.py
rename to core/src/test/python/mmlsparktest/cyber/feature/__init__.py
diff --git a/src/test/python/mmlsparktest/cyber/feature/test_indexers.py b/core/src/test/python/mmlsparktest/cyber/feature/test_indexers.py
similarity index 100%
rename from src/test/python/mmlsparktest/cyber/feature/test_indexers.py
rename to core/src/test/python/mmlsparktest/cyber/feature/test_indexers.py
diff --git a/src/test/python/mmlsparktest/cyber/feature/test_scalers.py b/core/src/test/python/mmlsparktest/cyber/feature/test_scalers.py
similarity index 100%
rename from src/test/python/mmlsparktest/cyber/feature/test_scalers.py
rename to core/src/test/python/mmlsparktest/cyber/feature/test_scalers.py
diff --git a/src/main/python/mmlspark/vw/__init__.py b/core/src/test/python/mmlsparktest/cyber/utils/__init__.py
similarity index 100%
rename from src/main/python/mmlspark/vw/__init__.py
rename to core/src/test/python/mmlsparktest/cyber/utils/__init__.py
diff --git a/src/test/python/mmlsparktest/cyber/utils/test_spark_utils.py b/core/src/test/python/mmlsparktest/cyber/utils/test_spark_utils.py
similarity index 100%
rename from src/test/python/mmlsparktest/cyber/utils/test_spark_utils.py
rename to core/src/test/python/mmlsparktest/cyber/utils/test_spark_utils.py
diff --git a/src/test/__init__.py b/core/src/test/python/mmlsparktest/nn/__init__.py
similarity index 100%
rename from src/test/__init__.py
rename to core/src/test/python/mmlsparktest/nn/__init__.py
diff --git a/src/test/python/mmlsparktest/nn/test_ball_tree.py b/core/src/test/python/mmlsparktest/nn/test_ball_tree.py
similarity index 100%
rename from src/test/python/mmlsparktest/nn/test_ball_tree.py
rename to core/src/test/python/mmlsparktest/nn/test_ball_tree.py
diff --git a/src/test/python/__init__.py b/core/src/test/python/mmlsparktest/recommendation/__init__.py
similarity index 100%
rename from src/test/python/__init__.py
rename to core/src/test/python/mmlsparktest/recommendation/__init__.py
diff --git a/src/test/python/mmlsparktest/recommendation/test_ranking.py b/core/src/test/python/mmlsparktest/recommendation/test_ranking.py
similarity index 100%
rename from src/test/python/mmlsparktest/recommendation/test_ranking.py
rename to core/src/test/python/mmlsparktest/recommendation/test_ranking.py
diff --git a/src/test/python/mmlsparktest/spark.py b/core/src/test/python/mmlsparktest/spark.py
similarity index 100%
rename from src/test/python/mmlsparktest/spark.py
rename to core/src/test/python/mmlsparktest/spark.py
diff --git a/src/test/python/setup.py b/core/src/test/python/setup.py
similarity index 100%
rename from src/test/python/setup.py
rename to core/src/test/python/setup.py
diff --git a/src/test/resources/audio1.txt b/core/src/test/resources/audio1.txt
similarity index 100%
rename from src/test/resources/audio1.txt
rename to core/src/test/resources/audio1.txt
diff --git a/src/test/resources/audio1.wav b/core/src/test/resources/audio1.wav
similarity index 100%
rename from src/test/resources/audio1.wav
rename to core/src/test/resources/audio1.wav
diff --git a/src/test/resources/audio2.txt b/core/src/test/resources/audio2.txt
similarity index 100%
rename from src/test/resources/audio2.txt
rename to core/src/test/resources/audio2.txt
diff --git a/src/test/resources/audio2.wav b/core/src/test/resources/audio2.wav
similarity index 100%
rename from src/test/resources/audio2.wav
rename to core/src/test/resources/audio2.wav
diff --git a/src/test/resources/audio3.mp3 b/core/src/test/resources/audio3.mp3
similarity index 100%
rename from src/test/resources/audio3.mp3
rename to core/src/test/resources/audio3.mp3
diff --git a/src/test/resources/audio3.txt b/core/src/test/resources/audio3.txt
similarity index 100%
rename from src/test/resources/audio3.txt
rename to core/src/test/resources/audio3.txt
diff --git a/src/test/resources/audio4.txt b/core/src/test/resources/audio4.txt
similarity index 100%
rename from src/test/resources/audio4.txt
rename to core/src/test/resources/audio4.txt
diff --git a/src/test/resources/benchmarks/benchmarkBasicDataTypes.json b/core/src/test/resources/benchmarks/benchmarkBasicDataTypes.json
similarity index 100%
rename from src/test/resources/benchmarks/benchmarkBasicDataTypes.json
rename to core/src/test/resources/benchmarks/benchmarkBasicDataTypes.json
diff --git a/src/test/resources/benchmarks/benchmarkDate.json b/core/src/test/resources/benchmarks/benchmarkDate.json
similarity index 100%
rename from src/test/resources/benchmarks/benchmarkDate.json
rename to core/src/test/resources/benchmarks/benchmarkDate.json
diff --git a/src/test/resources/benchmarks/benchmarkNoOneHot.json b/core/src/test/resources/benchmarks/benchmarkNoOneHot.json
similarity index 100%
rename from src/test/resources/benchmarks/benchmarkNoOneHot.json
rename to core/src/test/resources/benchmarks/benchmarkNoOneHot.json
diff --git a/src/test/resources/benchmarks/benchmarkOneHot.json b/core/src/test/resources/benchmarks/benchmarkOneHot.json
similarity index 100%
rename from src/test/resources/benchmarks/benchmarkOneHot.json
rename to core/src/test/resources/benchmarks/benchmarkOneHot.json
diff --git a/src/test/resources/benchmarks/benchmarkString.json b/core/src/test/resources/benchmarks/benchmarkString.json
similarity index 100%
rename from src/test/resources/benchmarks/benchmarkString.json
rename to core/src/test/resources/benchmarks/benchmarkString.json
diff --git a/src/test/resources/benchmarks/benchmarkStringIndexOneHot.json b/core/src/test/resources/benchmarks/benchmarkStringIndexOneHot.json
similarity index 100%
rename from src/test/resources/benchmarks/benchmarkStringIndexOneHot.json
rename to core/src/test/resources/benchmarks/benchmarkStringIndexOneHot.json
diff --git a/src/test/resources/benchmarks/benchmarkStringMissing.json b/core/src/test/resources/benchmarks/benchmarkStringMissing.json
similarity index 100%
rename from src/test/resources/benchmarks/benchmarkStringMissing.json
rename to core/src/test/resources/benchmarks/benchmarkStringMissing.json
diff --git a/src/test/resources/benchmarks/benchmarkVectors.json b/core/src/test/resources/benchmarks/benchmarkVectors.json
similarity index 100%
rename from src/test/resources/benchmarks/benchmarkVectors.json
rename to core/src/test/resources/benchmarks/benchmarkVectors.json
diff --git a/src/test/resources/benchmarks/benchmarks_VerifyTrainClassifier.csv b/core/src/test/resources/benchmarks/benchmarks_VerifyTrainClassifier.csv
similarity index 100%
rename from src/test/resources/benchmarks/benchmarks_VerifyTrainClassifier.csv
rename to core/src/test/resources/benchmarks/benchmarks_VerifyTrainClassifier.csv
diff --git a/src/test/resources/benchmarks/benchmarks_VerifyTuneHyperparameters.csv b/core/src/test/resources/benchmarks/benchmarks_VerifyTuneHyperparameters.csv
similarity index 100%
rename from src/test/resources/benchmarks/benchmarks_VerifyTuneHyperparameters.csv
rename to core/src/test/resources/benchmarks/benchmarks_VerifyTuneHyperparameters.csv
diff --git a/src/test/resources/benchmarks/benchmarks_VerifyVowpalWabbitRegressor.csv b/core/src/test/resources/benchmarks/benchmarks_VerifyVowpalWabbitRegressor.csv
similarity index 100%
rename from src/test/resources/benchmarks/benchmarks_VerifyVowpalWabbitRegressor.csv
rename to core/src/test/resources/benchmarks/benchmarks_VerifyVowpalWabbitRegressor.csv
diff --git a/src/test/resources/demoUsage.csv.gz b/core/src/test/resources/demoUsage.csv.gz
similarity index 100%
rename from src/test/resources/demoUsage.csv.gz
rename to core/src/test/resources/demoUsage.csv.gz
diff --git a/src/test/resources/dialogue.mp3 b/core/src/test/resources/dialogue.mp3
similarity index 100%
rename from src/test/resources/dialogue.mp3
rename to core/src/test/resources/dialogue.mp3
diff --git a/src/test/resources/lily.wav b/core/src/test/resources/lily.wav
similarity index 100%
rename from src/test/resources/lily.wav
rename to core/src/test/resources/lily.wav
diff --git a/src/test/resources/mark.wav b/core/src/test/resources/mark.wav
similarity index 100%
rename from src/test/resources/mark.wav
rename to core/src/test/resources/mark.wav
diff --git a/src/test/resources/sim_count1.csv.gz b/core/src/test/resources/sim_count1.csv.gz
similarity index 100%
rename from src/test/resources/sim_count1.csv.gz
rename to core/src/test/resources/sim_count1.csv.gz
diff --git a/src/test/resources/sim_count3.csv.gz b/core/src/test/resources/sim_count3.csv.gz
similarity index 100%
rename from src/test/resources/sim_count3.csv.gz
rename to core/src/test/resources/sim_count3.csv.gz
diff --git a/src/test/resources/sim_jac1.csv.gz b/core/src/test/resources/sim_jac1.csv.gz
similarity index 100%
rename from src/test/resources/sim_jac1.csv.gz
rename to core/src/test/resources/sim_jac1.csv.gz
diff --git a/src/test/resources/sim_jac3.csv.gz b/core/src/test/resources/sim_jac3.csv.gz
similarity index 100%
rename from src/test/resources/sim_jac3.csv.gz
rename to core/src/test/resources/sim_jac3.csv.gz
diff --git a/src/test/resources/sim_lift1.csv.gz b/core/src/test/resources/sim_lift1.csv.gz
similarity index 100%
rename from src/test/resources/sim_lift1.csv.gz
rename to core/src/test/resources/sim_lift1.csv.gz
diff --git a/src/test/resources/sim_lift3.csv.gz b/core/src/test/resources/sim_lift3.csv.gz
similarity index 100%
rename from src/test/resources/sim_lift3.csv.gz
rename to core/src/test/resources/sim_lift3.csv.gz
diff --git a/src/test/resources/user_aff.csv.gz b/core/src/test/resources/user_aff.csv.gz
similarity index 100%
rename from src/test/resources/user_aff.csv.gz
rename to core/src/test/resources/user_aff.csv.gz
diff --git a/src/test/resources/userpred_count3_userid_only.csv.gz b/core/src/test/resources/userpred_count3_userid_only.csv.gz
similarity index 100%
rename from src/test/resources/userpred_count3_userid_only.csv.gz
rename to core/src/test/resources/userpred_count3_userid_only.csv.gz
diff --git a/src/test/resources/userpred_jac3_userid_only.csv.gz b/core/src/test/resources/userpred_jac3_userid_only.csv.gz
similarity index 100%
rename from src/test/resources/userpred_jac3_userid_only.csv.gz
rename to core/src/test/resources/userpred_jac3_userid_only.csv.gz
diff --git a/src/test/resources/userpred_lift3_userid_only.csv.gz b/core/src/test/resources/userpred_lift3_userid_only.csv.gz
similarity index 100%
rename from src/test/resources/userpred_lift3_userid_only.csv.gz
rename to core/src/test/resources/userpred_lift3_userid_only.csv.gz
diff --git a/src/test/scala/com/microsoft/ml/spark/Secrets.scala b/core/src/test/scala/com/microsoft/ml/spark/Secrets.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/Secrets.scala
rename to core/src/test/scala/com/microsoft/ml/spark/Secrets.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala b/core/src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala
rename to core/src/test/scala/com/microsoft/ml/spark/automl/VerifyFindBestModel.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala b/core/src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala
rename to core/src/test/scala/com/microsoft/ml/spark/automl/VerifyTuneHyperparameters.scala
diff --git a/core/src/test/scala/com/microsoft/ml/spark/codegen/TestGen.scala b/core/src/test/scala/com/microsoft/ml/spark/codegen/TestGen.scala
new file mode 100644
index 00000000000..b46aefd7b74
--- /dev/null
+++ b/core/src/test/scala/com/microsoft/ml/spark/codegen/TestGen.scala
@@ -0,0 +1,49 @@
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See LICENSE in project root for information.
+
+package com.microsoft.ml.spark.codegen
+
+import java.io.File
+
+import com.microsoft.ml.spark.codegen.CodegenConfigProtocol._
+import com.microsoft.ml.spark.core.env.FileUtilities._
+import com.microsoft.ml.spark.core.test.base.TestBase
+import com.microsoft.ml.spark.core.test.fuzzing.PyTestFuzzing
+import com.microsoft.ml.spark.core.utils.JarLoadingUtils.instantiateServices
+import org.apache.commons.io.FileUtils
+import spray.json._
+
+
+object TestGen {
+
+ import CodeGenUtils._
+
+ def generatePythonTests(conf: CodegenConfig): Unit = {
+ instantiateServices[PyTestFuzzing[_]]().foreach { ltc =>
+ try {
+ ltc.makePyTestFile(conf)
+ } catch {
+ case _: NotImplementedError =>
+ println(s"ERROR: Could not generate test for ${ltc.testClassName} because of Complex Parameters")
+ }
+ }
+ }
+
+ private def makeInitFiles(conf: CodegenConfig, packageFolder: String = ""): Unit = {
+ val dir = new File(new File(conf.pyTestDir, "mmlsparktest"), packageFolder)
+ writeFile(new File(dir, "__init__.py"), "")
+ dir.listFiles().filter(_.isDirectory).foreach(f =>
+ makeInitFiles(conf, packageFolder + "/" + f.getName)
+ )
+ }
+
+ def main(args: Array[String]): Unit = {
+ val conf = args.head.parseJson.convertTo[CodegenConfig]
+ clean(conf.testDataDir)
+ clean(conf.pyTestDir)
+ generatePythonTests(conf)
+ TestBase.stopSparkSession()
+ FileUtils.copyDirectoryToDirectory(toDir(conf.pyTestOverrideDir), toDir(conf.pyTestDir))
+ makeInitFiles(conf)
+ }
+}
diff --git a/src/test/scala/com/microsoft/ml/spark/core/ml/HashingTFSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/core/ml/HashingTFSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/ml/HashingTFSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/ml/HashingTFSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/ml/IDFSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/core/ml/IDFSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/ml/IDFSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/ml/IDFSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/ml/NGramSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/core/ml/NGramSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/ml/NGramSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/ml/NGramSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/ml/OneHotEncoderSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/core/ml/OneHotEncoderSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/ml/OneHotEncoderSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/ml/OneHotEncoderSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/ml/Word2VecSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/core/ml/Word2VecSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/ml/Word2VecSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/ml/Word2VecSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/schema/SparkBindingsTest.scala b/core/src/test/scala/com/microsoft/ml/spark/core/schema/SparkBindingsTest.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/schema/SparkBindingsTest.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/schema/SparkBindingsTest.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/schema/TestCategoricals.scala b/core/src/test/scala/com/microsoft/ml/spark/core/schema/TestCategoricals.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/schema/TestCategoricals.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/schema/TestCategoricals.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/schema/VerifyFastVectorAssembler.scala b/core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifyFastVectorAssembler.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/schema/VerifyFastVectorAssembler.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifyFastVectorAssembler.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/schema/VerifySparkSchema.scala b/core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifySparkSchema.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/schema/VerifySparkSchema.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/schema/VerifySparkSchema.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/serialize/ValidateComplexParamSerializer.scala b/core/src/test/scala/com/microsoft/ml/spark/core/serialize/ValidateComplexParamSerializer.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/serialize/ValidateComplexParamSerializer.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/serialize/ValidateComplexParamSerializer.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala b/core/src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala
similarity index 97%
rename from src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala
index faaf19398ea..4b2de249739 100644
--- a/src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/core/test/base/SparkSessionFactory.scala
@@ -47,7 +47,6 @@ object SparkSessionFactory {
val sess = SparkSession.builder()
.config(conf)
.getOrCreate()
- sess.sparkContext.setLogLevel(logLevel)
sess
}
diff --git a/src/test/scala/com/microsoft/ml/spark/core/test/base/TestBase.scala b/core/src/test/scala/com/microsoft/ml/spark/core/test/base/TestBase.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/test/base/TestBase.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/test/base/TestBase.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/test/benchmarks/Benchmarks.scala b/core/src/test/scala/com/microsoft/ml/spark/core/test/benchmarks/Benchmarks.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/test/benchmarks/Benchmarks.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/test/benchmarks/Benchmarks.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala b/core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala
similarity index 90%
rename from src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala
index 9adbad67236..9ee92739c58 100644
--- a/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/Fuzzing.scala
@@ -7,7 +7,7 @@ import java.io.File
import java.nio.charset.StandardCharsets
import java.nio.file.Files
-import com.microsoft.ml.spark.codegen.Config
+import com.microsoft.ml.spark.codegen.CodegenConfig
import com.microsoft.ml.spark.core.env.FileUtilities
import com.microsoft.ml.spark.core.test.base.TestBase
import org.apache.commons.io.FileUtils
@@ -50,17 +50,17 @@ trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality
val testClassName: String = this.getClass.getName.split(".".toCharArray).last
- val testDataDir: File = FileUtilities.join(
- Config.TestDataDir, this.getClass.getName.split(".".toCharArray).last)
+ def testDataDir(conf: CodegenConfig): File = FileUtilities.join(
+ conf.testDataDir, this.getClass.getName.split(".".toCharArray).last)
- def saveDataset(df: DataFrame, name: String): Unit = {
- df.write.mode("overwrite").parquet(new File(testDataDir, s"$name.parquet").toString)
+ def saveDataset(conf: CodegenConfig, df: DataFrame, name: String): Unit = {
+ df.write.mode("overwrite").parquet(new File(testDataDir(conf), s"$name.parquet").toString)
}
- def saveModel(model: S, name: String): Unit = {
+ def saveModel(conf: CodegenConfig, model: S, name: String): Unit = {
model match {
case writable: MLWritable =>
- writable.write.overwrite().save(new File(testDataDir, s"$name.model").toString)
+ writable.write.overwrite().save(new File(testDataDir(conf), s"$name.model").toString)
case _ =>
throw new IllegalArgumentException(s"${model.getClass.getName} is not writable")
}
@@ -69,14 +69,14 @@ trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality
val testFitting = false
- def saveTestData(): Unit = {
- testDataDir.mkdirs()
+ def saveTestData(conf: CodegenConfig): Unit = {
+ testDataDir(conf).mkdirs()
pyTestObjects().zipWithIndex.foreach { case (to, i) =>
- saveModel(to.stage, s"model-$i")
+ saveModel(conf, to.stage, s"model-$i")
if (testFitting) {
- saveDataset(to.fitDF, s"fit-$i")
- saveDataset(to.transDF, s"trans-$i")
- to.validateDF.foreach(saveDataset(_, s"val-$i"))
+ saveDataset(conf, to.fitDF, s"fit-$i")
+ saveDataset(conf, to.transDF, s"trans-$i")
+ to.validateDF.foreach(saveDataset(conf, _, s"val-$i"))
}
}
}
@@ -144,9 +144,9 @@ trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality
}
- def makePyTestFile(): Unit = {
+ def makePyTestFile(conf: CodegenConfig): Unit = {
spark
- saveTestData()
+ saveTestData(conf)
val generatedTests = pyTestObjects().zipWithIndex.map { case (to, i) => makePyTests(to, i) }
val stage = pyTestObjects().head.stage
val stageName = stage.getClass.getName.split(".".toCharArray).last
@@ -159,7 +159,7 @@ trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality
|from os.path import join
|import json
|
- |test_data_dir = "${testDataDir.toString.replaceAllLiterally("\\", "\\\\")}"
+ |test_data_dir = "${testDataDir(conf).toString.replaceAllLiterally("\\", "\\\\")}"
|
|
|class $testClassName(unittest.TestCase):
@@ -180,7 +180,7 @@ trait PyTestFuzzing[S <: PipelineStage] extends TestBase with DataFrameEquality
val testFolders = importPath.mkString(".")
.replaceAllLiterally("com.microsoft.ml.spark", "mmlsparktest").split(".".toCharArray)
- val testDir = FileUtilities.join((Seq(Config.PyTestDir.toString) ++ testFolders.toSeq): _*)
+ val testDir = FileUtilities.join((Seq(conf.pyTestDir.toString) ++ testFolders.toSeq): _*)
testDir.mkdirs()
Files.write(
FileUtilities.join(testDir, "test_" + camelToSnake(testClassName) + ".py").toPath,
diff --git a/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala b/core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala
similarity index 98%
rename from src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala
index ce573f761d9..67d31910dc9 100644
--- a/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/core/test/fuzzing/FuzzingTest.scala
@@ -257,17 +257,17 @@ class FuzzingTest extends TestBase {
// set the context loader to pick up on the jars
//Thread.currentThread().setContextClassLoader(JarLoadingUtils.classLoader)
- private lazy val readers: List[MLReadable[_]] = JarLoadingUtils.instantiateObjects[MLReadable[_]]
+ private lazy val readers: List[MLReadable[_]] = JarLoadingUtils.instantiateObjects[MLReadable[_]]()
- private lazy val pipelineStages: List[PipelineStage] = JarLoadingUtils.instantiateServices[PipelineStage]
+ private lazy val pipelineStages: List[PipelineStage] = JarLoadingUtils.instantiateServices[PipelineStage]()
private lazy val experimentFuzzers: List[ExperimentFuzzing[_ <: PipelineStage]] =
- JarLoadingUtils.instantiateServices[ExperimentFuzzing[_ <: PipelineStage]]
+ JarLoadingUtils.instantiateServices[ExperimentFuzzing[_ <: PipelineStage]]()
private lazy val serializationFuzzers: List[SerializationFuzzing[_ <: PipelineStage with MLWritable]] =
- JarLoadingUtils.instantiateServices[SerializationFuzzing[_ <: PipelineStage with MLWritable]]
+ JarLoadingUtils.instantiateServices[SerializationFuzzing[_ <: PipelineStage with MLWritable]]()
private lazy val pytestFuzzers: List[PyTestFuzzing[_ <: PipelineStage]] =
- JarLoadingUtils.instantiateServices[PyTestFuzzing[_ <: PipelineStage]]
+ JarLoadingUtils.instantiateServices[PyTestFuzzing[_ <: PipelineStage]]()
}
diff --git a/src/test/scala/com/microsoft/ml/spark/core/utils/VerifyClusterUtil.scala b/core/src/test/scala/com/microsoft/ml/spark/core/utils/VerifyClusterUtil.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/core/utils/VerifyClusterUtil.scala
rename to core/src/test/scala/com/microsoft/ml/spark/core/utils/VerifyClusterUtil.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCleanMissingData.scala b/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCleanMissingData.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/featurize/VerifyCleanMissingData.scala
rename to core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCleanMissingData.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCountSelector.scala b/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCountSelector.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/featurize/VerifyCountSelector.scala
rename to core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyCountSelector.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/featurize/VerifyDataConversion.scala b/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyDataConversion.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/featurize/VerifyDataConversion.scala
rename to core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyDataConversion.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/featurize/VerifyFeaturize.scala b/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyFeaturize.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/featurize/VerifyFeaturize.scala
rename to core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyFeaturize.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/featurize/VerifyValueIndexer.scala b/core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyValueIndexer.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/featurize/VerifyValueIndexer.scala
rename to core/src/test/scala/com/microsoft/ml/spark/featurize/VerifyValueIndexer.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/featurize/text/MultiNGramSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/featurize/text/MultiNGramSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/featurize/text/MultiNGramSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/featurize/text/MultiNGramSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/featurize/text/PageSplitterSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/featurize/text/PageSplitterSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/featurize/text/PageSplitterSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/featurize/text/PageSplitterSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizerSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizerSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizerSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/featurize/text/TextFeaturizerSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/flaky/PartitionConsolidatorSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/flaky/PartitionConsolidatorSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/flaky/PartitionConsolidatorSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/flaky/PartitionConsolidatorSuite.scala
diff --git a/core/src/test/scala/com/microsoft/ml/spark/image/ImageTestUtils.scala b/core/src/test/scala/com/microsoft/ml/spark/image/ImageTestUtils.scala
new file mode 100644
index 00000000000..84e516c5498
--- /dev/null
+++ b/core/src/test/scala/com/microsoft/ml/spark/image/ImageTestUtils.scala
@@ -0,0 +1,109 @@
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See LICENSE in project root for information.
+
+package com.microsoft.ml.spark.image
+
+import java.io.File
+import java.net.URL
+
+import com.microsoft.ml.spark.build.BuildInfo
+import com.microsoft.ml.spark.core.env.FileUtilities
+import org.apache.spark.ml.linalg.DenseVector
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import com.microsoft.ml.spark.core.test.base.TestBase
+import com.microsoft.ml.spark.io.IOImplicits.dfrToDfre
+import org.apache.commons.io.FileUtils
+import org.apache.spark.sql.functions.col
+
+trait ImageTestUtils extends TestBase {
+
+ val filesRoot = BuildInfo.datasetDir.toString
+ val imagePath = FileUtilities.join(filesRoot, "Images", "CIFAR").toString
+ val modelPath = FileUtilities.join(filesRoot, "CNTKModel", "ConvNet_CIFAR10.model").toString
+ val inputCol = "cntk_images"
+ val outputCol = "out"
+ val labelCol = "labels"
+
+ val featureVectorLength = 3 * 32 * 32
+ lazy val saveFile = new File(tmpDir.toFile, "spark-z.model").toString
+
+ def testModelDF(spark: SparkSession): DataFrame = {
+ import spark.implicits._
+ spark.sparkContext.parallelize(Seq(
+ Array(1.32165250, -2.1215112, 0.63150704, 0.77315974, -1.28163720,
+ -0.20210080, -2.2839167, -2.08691480, 5.08418200, -1.33741090),
+ Array(3.44079640, 1.4877119, -0.74059330, -0.34381202, -2.48724990,
+ -2.62866950, -3.1693816, -3.14182600, 4.76314800, 0.68712880),
+ Array(-1.88747900, -4.7685330, 0.15169683, 6.80547570, -0.38405967,
+ 3.41065170, 1.3302778, -0.87714905, -2.18046050, -4.16661830),
+ Array(5.01010300, 3.9860306, -1.36795600, -0.89830830, -4.49545430,
+ -4.19537070, -4.4045380, -5.81759450, 6.93805700, 1.49001510),
+ Array(-4.70754600, -6.0414960, 1.20658250, 5.40738300, 1.07661690,
+ 4.71566440, 4.3834330, -1.57187440, -2.96569730, -5.43208270),
+ Array(-1.23873880, -3.2042341, 2.54533000, 5.51954800, 2.89042470,
+ 0.12380804, 3.8639085, -4.79466800, -2.41463420, -5.17418430))).toDF
+ }
+
+ def testImages(spark: SparkSession): DataFrame = {
+ val images = spark.read.image.load(imagePath)
+
+ val unroll = new UnrollImage().setInputCol("image").setOutputCol(inputCol)
+
+ unroll.transform(images).select(inputCol)
+ }
+
+ def makeFakeData(spark: SparkSession, rows: Int, size: Int, outputDouble: Boolean = false): DataFrame = {
+ import spark.implicits._
+ if (outputDouble) {
+ List
+ .fill(rows)(List.fill(size)(0.0).toArray)
+ .zip(List.fill(rows)(0.0))
+ .toDF(inputCol, labelCol)
+ } else {
+ List
+ .fill(rows)(List.fill(size)(0.0.toFloat).toArray)
+ .zip(List.fill(rows)(0.0))
+ .toDF(inputCol, labelCol)
+ }
+ }
+
+ protected def compareToTestModel(result: DataFrame) = {
+ //TODO improve checks
+ assert(result.columns.toSet == Set(inputCol, outputCol))
+ assert(result.count() == testModelDF(result.sparkSession).count())
+ val max = result
+ .select(outputCol)
+ .collect()
+ .map(row => row.getAs[DenseVector](0).toArray.max)
+ .max
+ assert(max < 10 & max > -10)
+ }
+
+ lazy val images: DataFrame = spark.read.image.load(imagePath)
+ .withColumnRenamed("image", inputCol)
+ lazy val binaryImages: DataFrame = spark.read.binary.load(imagePath)
+ .select(col("value.bytes").alias(inputCol))
+
+ lazy val groceriesPath = FileUtilities.join(BuildInfo.datasetDir, "Images","Grocery")
+ lazy val groceryImages: DataFrame = spark.read.image
+ .option("dropInvalid", true)
+ .load(groceriesPath + "**")
+ .withColumnRenamed("image", inputCol)
+
+ lazy val greyscaleImageLocation: String = {
+ val loc = "/tmp/greyscale.jpg"
+ val f = new File(loc)
+ if (f.exists()) {f.delete()}
+ FileUtils.copyURLToFile(new URL("https://mmlspark.blob.core.windows.net/datasets/LIME/greyscale.jpg"), f)
+ loc
+ }
+
+ lazy val greyscaleImage: DataFrame = spark
+ .read.image.load(greyscaleImageLocation)
+ .select(col("image").alias(inputCol))
+
+ lazy val greyscaleBinary: DataFrame = spark
+ .read.binary.load(greyscaleImageLocation)
+ .select(col("value.bytes").alias(inputCol))
+
+}
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split1/BinaryFileReaderSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split1/BinaryFileReaderSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split1/BinaryFileReaderSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split1/BinaryFileReaderSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split1/HTTPTransformerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split1/HTTPTransformerSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split1/HTTPTransformerSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split1/HTTPTransformerSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala
similarity index 99%
rename from src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala
index 13592cec90b..b611ef5158e 100644
--- a/src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/io/split1/ImageReaderSuite.scala
@@ -5,7 +5,7 @@ package com.microsoft.ml.spark.io.split1
import java.io.{File, FileInputStream}
-import com.microsoft.ml.spark.cognitive.OsUtils
+import com.microsoft.ml.spark.core.utils.OsUtils
import com.microsoft.ml.spark.core.env.FileUtilities
import com.microsoft.ml.spark.core.schema.ImageSchemaUtils
import com.microsoft.ml.spark.core.test.base.TestBase
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split1/ParserSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split1/ParserSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split1/ParserSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split1/ParserSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split1/PowerBiSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split1/PowerBiSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split1/PowerBiSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split1/PowerBiSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split1/SimpleHTTPTransformerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split1/SimpleHTTPTransformerSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split1/SimpleHTTPTransformerSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split1/SimpleHTTPTransformerSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split2/ContinuousHTTPSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split2/ContinuousHTTPSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split2/ContinuousHTTPSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split2/ContinuousHTTPSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split2/DistributedHTTPSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split2/DistributedHTTPSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split2/DistributedHTTPSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split2/DistributedHTTPSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split2/HTTPSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPv2Suite.scala b/core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPv2Suite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/io/split2/HTTPv2Suite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/io/split2/HTTPv2Suite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala b/core/src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala
similarity index 98%
rename from src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala
rename to core/src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala
index e623605967e..2ee5fd153e2 100644
--- a/src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/isolationforest/VerifyIsolationForest.scala
@@ -86,10 +86,6 @@ class VerifyIsolationForest extends Benchmarks with EstimatorFuzzing[IsolationFo
data
}
- test("foo"){
- new IsolationForest().makePyFile()
- }
-
override def reader: MLReadable[_] = IsolationForest
override def modelReader: MLReadable[_] = IsolationForestModel
diff --git a/core/src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala b/core/src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala
new file mode 100644
index 00000000000..b58e597944b
--- /dev/null
+++ b/core/src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala
@@ -0,0 +1,66 @@
+// Copyright (C) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License. See LICENSE in project root for information.
+
+package com.microsoft.ml.spark.lime
+
+import breeze.linalg.{*, DenseMatrix}
+import breeze.stats.distributions.Rand
+import com.microsoft.ml.spark.core.test.base.TestBase
+import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing}
+import org.apache.spark.ml.linalg.DenseVector
+import org.apache.spark.ml.param.DataFrameEquality
+import org.apache.spark.ml.regression.LinearRegression
+import org.apache.spark.ml.util.MLReadable
+
+trait LimeTestBase extends TestBase {
+
+ import spark.implicits._
+
+ lazy val nRows = 100
+ lazy val d1 = 3
+ lazy val d2 = 1
+
+ lazy val m: DenseMatrix[Double] = new DenseMatrix(d1, d2, Array(1.0, -1.0, 2.0))
+ lazy val x: DenseMatrix[Double] = DenseMatrix.rand(nRows, d1, Rand.gaussian)
+ lazy val noise: DenseMatrix[Double] = DenseMatrix.rand(nRows, d2, Rand.gaussian) * 0.1
+ lazy val y = x * m //+ noise
+
+ lazy val xRows = x(*, ::).iterator.toSeq.map(dv => new DenseVector(dv.toArray))
+ lazy val yRows = y(*, ::).iterator.toSeq.map(dv => dv(0))
+ lazy val df = xRows.zip(yRows).toDF("features", "label")
+
+ lazy val model = new LinearRegression().fit(df)
+
+ lazy val lime = new TabularLIME()
+ .setModel(model)
+ .setInputCol("features")
+ .setPredictionCol(model.getPredictionCol)
+ .setOutputCol("out")
+ .setNSamples(1000)
+
+ lazy val limeModel = lime.fit(df)
+}
+
+class TabularLIMESuite extends EstimatorFuzzing[TabularLIME] with
+ DataFrameEquality with LimeTestBase {
+
+ test("text lime usage test check") {
+ val results = limeModel.transform(df).select("out")
+ .collect().map(_.getAs[DenseVector](0))
+ results.foreach(result => assert(result === new DenseVector(m.data)))
+ }
+
+ override def testObjects(): Seq[TestObject[TabularLIME]] = Seq(new TestObject(lime, df))
+
+ override def reader: MLReadable[_] = TabularLIME
+
+ override def modelReader: MLReadable[_] = TabularLIMEModel
+}
+
+class TabularLIMEModelSuite extends TransformerFuzzing[TabularLIMEModel] with
+ DataFrameEquality with LimeTestBase {
+
+ override def testObjects(): Seq[TestObject[TabularLIMEModel]] = Seq(new TestObject(limeModel, df))
+
+ override def reader: MLReadable[_] = TabularLIMEModel
+}
diff --git a/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala
similarity index 96%
rename from src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala
index 5d2c26e330f..289720f9691 100644
--- a/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelSuite.scala
@@ -7,13 +7,13 @@ import java.awt.Color
import java.awt.image.BufferedImage
import java.io.File
-import com.microsoft.ml.spark.cntk.CNTKTestUtils
+import com.microsoft.ml.spark.image.ImageTestUtils
import com.microsoft.ml.spark.io.image.ImageUtils
import javax.imageio.ImageIO
import scala.util.Random
-class SuperpixelSuite extends CNTKTestUtils {
+class SuperpixelSuite extends ImageTestUtils {
lazy val sp1 = new Superpixel(img, 16, 130)
lazy val sp2 = new Superpixel(img2, 100, 130)
diff --git a/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala
similarity index 90%
rename from src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala
index 881aefed41a..0c4a5b78d0b 100644
--- a/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/lime/SuperpixelTransformerSuite.scala
@@ -4,12 +4,12 @@
package com.microsoft.ml.spark.lime
import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
-import com.microsoft.ml.spark.image.NetworkUtils
+import com.microsoft.ml.spark.image.ImageTestUtils
import com.microsoft.ml.spark.io.split1.FileReaderUtils
import org.apache.spark.ml.util.MLReadable
class SuperpixelTransformerSuite extends TransformerFuzzing[SuperpixelTransformer]
- with NetworkUtils with FileReaderUtils {
+ with ImageTestUtils with FileReaderUtils {
lazy val spt: SuperpixelTransformer = new SuperpixelTransformer().setInputCol(inputCol)
test("basic functionality"){
diff --git a/src/test/scala/com/microsoft/ml/spark/lime/TextLIMESuite.scala b/core/src/test/scala/com/microsoft/ml/spark/lime/TextLIMESuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/lime/TextLIMESuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/lime/TextLIMESuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala b/core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala
similarity index 97%
rename from src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala
rename to core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala
index 641afac6265..e72432bd342 100644
--- a/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/nbtest/DatabricksUtilities.scala
@@ -42,9 +42,7 @@ object DatabricksUtilities extends HasHttpClient {
val Folder = s"/MMLSparkBuild/build_${BuildInfo.version}"
// MMLSpark info
- val TruncatedScalaVersion: String = BuildInfo.scalaVersion
- .split(".".toCharArray.head).dropRight(1).mkString(".")
- val Version = s"com.microsoft.ml.spark:${BuildInfo.name}_$TruncatedScalaVersion:${BuildInfo.version}"
+ val Version = s"com.microsoft.ml.spark:mmlspark:${BuildInfo.version}"
val Repository = "https://mmlspark.azureedge.net/maven"
val Libraries: String = List(
@@ -59,7 +57,7 @@ object DatabricksUtilities extends HasHttpClient {
val TimeoutInMillis: Int = 40 * 60 * 1000
val NotebookFiles: Array[File] = Option(
- FileUtilities.join(BuildInfo.baseDirectory, "notebooks", "samples").getCanonicalFile.listFiles()
+ FileUtilities.join(BuildInfo.baseDirectory.getParent, "notebooks").getCanonicalFile.listFiles()
).get
val ParallizableNotebooks = NotebookFiles.filterNot(_.getName.contains("Vowpal"))
diff --git a/src/test/scala/com/microsoft/ml/spark/nbtest/NotebookTests.scala b/core/src/test/scala/com/microsoft/ml/spark/nbtest/NotebookTests.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/nbtest/NotebookTests.scala
rename to core/src/test/scala/com/microsoft/ml/spark/nbtest/NotebookTests.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/nbtest/SprayUtilities.scala b/core/src/test/scala/com/microsoft/ml/spark/nbtest/SprayUtilities.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/nbtest/SprayUtilities.scala
rename to core/src/test/scala/com/microsoft/ml/spark/nbtest/SprayUtilities.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/nn/BallTreeTest.scala b/core/src/test/scala/com/microsoft/ml/spark/nn/BallTreeTest.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/nn/BallTreeTest.scala
rename to core/src/test/scala/com/microsoft/ml/spark/nn/BallTreeTest.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/nn/ConditionalBallTreeTest.scala b/core/src/test/scala/com/microsoft/ml/spark/nn/ConditionalBallTreeTest.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/nn/ConditionalBallTreeTest.scala
rename to core/src/test/scala/com/microsoft/ml/spark/nn/ConditionalBallTreeTest.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/nn/KNNTest.scala b/core/src/test/scala/com/microsoft/ml/spark/nn/KNNTest.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/nn/KNNTest.scala
rename to core/src/test/scala/com/microsoft/ml/spark/nn/KNNTest.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/recommendation/RankingAdapterSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingAdapterSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/recommendation/RankingAdapterSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingAdapterSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/recommendation/RankingEvaluatorSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingEvaluatorSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/recommendation/RankingEvaluatorSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingEvaluatorSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTestBase.scala b/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTestBase.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/recommendation/RankingTestBase.scala
rename to core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTestBase.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/recommendation/RankingTrainValidationSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexerSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexerSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexerSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/recommendation/RecommendationIndexerSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/recommendation/SARSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/recommendation/SARSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/recommendation/SARSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/recommendation/SARSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/BatchIteratorSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/BatchIteratorSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/BatchIteratorSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/BatchIteratorSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/CacherSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/CacherSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/CacherSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/CacherSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/ClassBalancerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/ClassBalancerSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/ClassBalancerSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/ClassBalancerSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala
similarity index 92%
rename from src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala
index 1507d152500..c96764cfd29 100644
--- a/src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala
+++ b/core/src/test/scala/com/microsoft/ml/spark/stages/DropColumnsSuite.scala
@@ -3,9 +3,8 @@
package com.microsoft.ml.spark.stages
-import com.microsoft.ml.spark.codegen.Config
import com.microsoft.ml.spark.core.test.base.TestBase
-import com.microsoft.ml.spark.core.test.fuzzing.{PyTestFuzzing, TestObject, TransformerFuzzing}
+import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
import org.apache.spark.ml.util.MLReadable
class DropColumnsSuite extends TestBase with TransformerFuzzing[DropColumns] {
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/EnsembleByKeySuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/EnsembleByKeySuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/EnsembleByKeySuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/EnsembleByKeySuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/ExplodeSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/ExplodeSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/ExplodeSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/ExplodeSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/LambdaSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/LambdaSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/LambdaSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/LambdaSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/MiniBatchTransformerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/MiniBatchTransformerSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/MiniBatchTransformerSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/MiniBatchTransformerSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/MultiColumnAdapterSpec.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/MultiColumnAdapterSpec.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/MultiColumnAdapterSpec.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/MultiColumnAdapterSpec.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/RenameColumnSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/RenameColumnSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/RenameColumnSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/RenameColumnSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/RepartitionSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/RepartitionSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/RepartitionSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/RepartitionSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/SelectColumnsSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/SelectColumnsSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/SelectColumnsSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/SelectColumnsSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/StratifiedRepartitionSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/StratifiedRepartitionSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/StratifiedRepartitionSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/StratifiedRepartitionSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/SummarizeDataSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/SummarizeDataSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/SummarizeDataSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/SummarizeDataSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/TextPreprocessorSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/TextPreprocessorSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/TextPreprocessorSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/TextPreprocessorSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/TimerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/TimerSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/TimerSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/TimerSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/UDFSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/UDFSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/UDFSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/UDFSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/UDFTransformerSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/UDFTransformerSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/UDFTransformerSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/UDFTransformerSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/stages/UnicodeNormalizeSuite.scala b/core/src/test/scala/com/microsoft/ml/spark/stages/UnicodeNormalizeSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/stages/UnicodeNormalizeSuite.scala
rename to core/src/test/scala/com/microsoft/ml/spark/stages/UnicodeNormalizeSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/train/VerifyComputeModelStatistics.scala b/core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputeModelStatistics.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/train/VerifyComputeModelStatistics.scala
rename to core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputeModelStatistics.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/train/VerifyComputePerInstanceStatistics.scala b/core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputePerInstanceStatistics.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/train/VerifyComputePerInstanceStatistics.scala
rename to core/src/test/scala/com/microsoft/ml/spark/train/VerifyComputePerInstanceStatistics.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainClassifier.scala b/core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainClassifier.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/train/VerifyTrainClassifier.scala
rename to core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainClassifier.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainRegressor.scala b/core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainRegressor.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/train/VerifyTrainRegressor.scala
rename to core/src/test/scala/com/microsoft/ml/spark/train/VerifyTrainRegressor.scala
diff --git a/src/main/R/model_downloader.R b/deep-learning/src/main/R/model_downloader.R
similarity index 100%
rename from src/main/R/model_downloader.R
rename to deep-learning/src/main/R/model_downloader.R
diff --git a/src/main/python/mmlspark/cntk/CNTKModel.py b/deep-learning/src/main/python/mmlspark/cntk/CNTKModel.py
similarity index 100%
rename from src/main/python/mmlspark/cntk/CNTKModel.py
rename to deep-learning/src/main/python/mmlspark/cntk/CNTKModel.py
diff --git a/src/test/python/mmlsparktest/__init__.py b/deep-learning/src/main/python/mmlspark/cntk/__init__.py
similarity index 100%
rename from src/test/python/mmlsparktest/__init__.py
rename to deep-learning/src/main/python/mmlspark/cntk/__init__.py
diff --git a/src/main/python/mmlspark/image/ImageFeaturizer.py b/deep-learning/src/main/python/mmlspark/image/ImageFeaturizer.py
similarity index 100%
rename from src/main/python/mmlspark/image/ImageFeaturizer.py
rename to deep-learning/src/main/python/mmlspark/image/ImageFeaturizer.py
diff --git a/src/test/python/mmlsparktest/cognitive/__init__.py b/deep-learning/src/main/python/mmlspark/image/__init__.py
similarity index 100%
rename from src/test/python/mmlsparktest/cognitive/__init__.py
rename to deep-learning/src/main/python/mmlspark/image/__init__.py
diff --git a/src/main/scala/com/microsoft/CNTK/SerializableFunction.scala b/deep-learning/src/main/scala/com/microsoft/CNTK/SerializableFunction.scala
similarity index 100%
rename from src/main/scala/com/microsoft/CNTK/SerializableFunction.scala
rename to deep-learning/src/main/scala/com/microsoft/CNTK/SerializableFunction.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/cntk/CNTKFunctionParam.scala b/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKFunctionParam.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/cntk/CNTKFunctionParam.scala
rename to deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKFunctionParam.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala b/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala
rename to deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/CNTKModel.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/cntk/ConversionUtils.scala b/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ConversionUtils.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/cntk/ConversionUtils.scala
rename to deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/ConversionUtils.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/cntk/_CNTKModel.txt b/deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/_CNTKModel.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/cntk/_CNTKModel.txt
rename to deep-learning/src/main/scala/com/microsoft/ml/spark/cntk/_CNTKModel.txt
diff --git a/src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala b/deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala
similarity index 91%
rename from src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala
rename to deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala
index 3b68d0ee507..54f890242b4 100644
--- a/src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala
+++ b/deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/ModelDownloader.scala
@@ -7,6 +7,7 @@ import java.io._
import java.net.{URI, URL}
import java.util
+import com.microsoft.ml.spark.core.utils.FaultToleranceUtils
import org.apache.commons.io.IOUtils
import org.apache.hadoop.conf.{Configuration => HadoopConf}
import org.apache.hadoop.fs.{FileSystem, LocatedFileStatus, Path}
@@ -15,10 +16,8 @@ import org.apache.log4j.LogManager
import org.apache.spark.sql.SparkSession
import spray.json._
-import scala.annotation.tailrec
import scala.collection.JavaConverters._
-import scala.concurrent.duration.{Duration, FiniteDuration}
-import scala.concurrent.{Await, ExecutionContext, Future}
+import scala.concurrent.duration.Duration
/** Abstract representation of a repository for future expansion
*
@@ -34,32 +33,6 @@ private[spark] abstract class Repository[S <: Schema] {
}
-object FaultToleranceUtils {
- def retryWithTimeout[T](times: Int, timeout: Duration)(f: => T): T ={
- try {
- Await.result(Future(f)(ExecutionContext.global), timeout)
- } catch {
- case e: Exception if times >= 1 =>
- print(s"Received exception on call, retrying: $e")
- retryWithTimeout(times-1, timeout)(f)
- }
- }
-
- val Backoffs: Seq[Int] = Seq(0, 100, 200, 500)
-
- def retryWithTimeout[T](times: Seq[Int] = Backoffs)(f: => T): T ={
- try {
- f
- } catch {
- case e: Exception if times.nonEmpty =>
- println(s"Received exception on call, retrying: $e")
- Thread.sleep(times.head)
- retryWithTimeout(times.tail)(f)
- }
- }
-
-}
-
/** Exception returned if a repo cannot find the file
*
* @param uri : location of the file
diff --git a/src/main/scala/com/microsoft/ml/spark/downloader/Schema.scala b/deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/Schema.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/downloader/Schema.scala
rename to deep-learning/src/main/scala/com/microsoft/ml/spark/downloader/Schema.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.scala b/deep-learning/src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.scala
similarity index 99%
rename from src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.scala
rename to deep-learning/src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.scala
index 2db42e83b0c..c1bb3e9e596 100644
--- a/src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.scala
+++ b/deep-learning/src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.scala
@@ -132,7 +132,7 @@ class ImageFeaturizer(val uid: String) extends Transformer with HasInputCol with
/** @group getParam */
def getLayerNames: Array[String] = $(layerNames)
- setDefault(cutOutputLayers -> 1, outputCol -> (uid + "_output"), dropNa->true)
+ setDefault(cutOutputLayers -> 1, outputCol -> (uid + "_output"), dropNa -> true)
override def transform(dataset: Dataset[_]): DataFrame = {
logTransform[DataFrame]({
diff --git a/src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.txt b/deep-learning/src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.txt
rename to deep-learning/src/main/scala/com/microsoft/ml/spark/image/ImageFeaturizer.txt
diff --git a/src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala b/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala
similarity index 97%
rename from src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala
rename to deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala
index 37b4b1ad615..f8483945360 100644
--- a/src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala
+++ b/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKBindingSuite.scala
@@ -9,11 +9,12 @@ import com.microsoft.CNTK.CNTKExtensions._
import com.microsoft.CNTK.{SerializableFunction => CNTKFunction, _}
import com.microsoft.ml.spark.core.env.StreamUtilities._
import com.microsoft.ml.spark.core.test.base.LinuxOnly
+import com.microsoft.ml.spark.image.ImageTestUtils
import org.apache.commons.io.IOUtils
import scala.collection.JavaConverters._
-class CNTKBindingSuite extends LinuxOnly with CNTKTestUtils {
+class CNTKBindingSuite extends LinuxOnly with ImageTestUtils {
def toSeqSeq(fvv: FloatVectorVector): Seq[Seq[Float]] = {
(0 until fvv.size.toInt).map(i =>
diff --git a/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala b/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala
similarity index 97%
rename from src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala
rename to deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala
index 34893a7015c..8d2285be0ad 100644
--- a/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala
+++ b/deep-learning/src/test/scala/com/microsoft/ml/spark/cntk/CNTKModelSuite.scala
@@ -10,6 +10,7 @@ import com.microsoft.ml.spark.build.BuildInfo
import com.microsoft.ml.spark.core.env.FileUtilities
import com.microsoft.ml.spark.core.test.base.LinuxOnly
import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
+import com.microsoft.ml.spark.image.ImageTestUtils
import org.apache.commons.io.FileUtils
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.linalg.DenseVector
@@ -21,7 +22,7 @@ import org.apache.spark.sql.types._
import scala.util.Random
-class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzing[CNTKModel] {
+class CNTKModelSuite extends LinuxOnly with ImageTestUtils with TransformerFuzzing[CNTKModel] {
// TODO: Move away from getTempDirectoryPath and have TestBase provide one
@@ -54,7 +55,7 @@ class CNTKModelSuite extends LinuxOnly with CNTKTestUtils with TransformerFuzzin
.setOutputNodeIndex(0)
}
- lazy val images = testImages(spark)
+ override lazy val images = testImages(spark)
import spark.implicits._
diff --git a/src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala b/deep-learning/src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala
similarity index 97%
rename from src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala
rename to deep-learning/src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala
index ee6d53933a0..f67e4b82d5c 100644
--- a/src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala
+++ b/deep-learning/src/test/scala/com/microsoft/ml/spark/downloader/DownloaderSuite.scala
@@ -7,6 +7,7 @@ import java.io.File
import java.nio.file.Files
import com.microsoft.ml.spark.core.test.base.TestBase
+import com.microsoft.ml.spark.core.utils.FaultToleranceUtils
import org.apache.commons.io.FileUtils
import scala.collection.JavaConverters._
diff --git a/src/test/scala/com/microsoft/ml/spark/image/ImageFeaturizerSuite.scala b/deep-learning/src/test/scala/com/microsoft/ml/spark/image/ImageFeaturizerSuite.scala
similarity index 81%
rename from src/test/scala/com/microsoft/ml/spark/image/ImageFeaturizerSuite.scala
rename to deep-learning/src/test/scala/com/microsoft/ml/spark/image/ImageFeaturizerSuite.scala
index 247c7a421e1..6733d1fa674 100644
--- a/src/test/scala/com/microsoft/ml/spark/image/ImageFeaturizerSuite.scala
+++ b/deep-learning/src/test/scala/com/microsoft/ml/spark/image/ImageFeaturizerSuite.scala
@@ -8,24 +8,20 @@ import java.net.{URI, URL}
import com.microsoft.ml.spark.Secrets
import com.microsoft.ml.spark.build.BuildInfo
-import com.microsoft.ml.spark.cntk.CNTKTestUtils
import com.microsoft.ml.spark.core.env.FileUtilities
-import com.microsoft.ml.spark.core.test.base.TestBase
import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
-import com.microsoft.ml.spark.core.utils.ModelEquality
import com.microsoft.ml.spark.downloader.{ModelDownloader, ModelSchema}
import com.microsoft.ml.spark.io.IOImplicits._
import com.microsoft.ml.spark.io.powerbi.PowerBIWriter
import com.microsoft.ml.spark.io.split1.FileReaderUtils
-import org.apache.commons.io.FileUtils
import org.apache.spark.injections.UDFUtils
import org.apache.spark.ml.linalg.DenseVector
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.functions.col
import org.apache.spark.sql.types.StringType
-trait NetworkUtils extends CNTKTestUtils with FileReaderUtils {
+trait TrainedCNTKModelUtils extends ImageTestUtils with FileReaderUtils {
lazy val modelDir = new File(filesRoot, "CNTKModel")
lazy val modelDownloader = new ModelDownloader(spark, modelDir.toURI)
@@ -33,33 +29,6 @@ trait NetworkUtils extends CNTKTestUtils with FileReaderUtils {
lazy val resNetUri: URI = new File(modelDir, "ResNet50_ImageNet.model").toURI
lazy val resNet: ModelSchema = modelDownloader.downloadByName("ResNet50")
- lazy val images: DataFrame = spark.read.image.load(imagePath)
- .withColumnRenamed("image", inputCol)
- lazy val binaryImages: DataFrame = spark.read.binary.load(imagePath)
- .select(col("value.bytes").alias(inputCol))
-
- lazy val groceriesPath = FileUtilities.join(BuildInfo.datasetDir, "Images","Grocery")
- lazy val groceryImages: DataFrame = spark.read.image
- .option("dropInvalid", true)
- .load(groceriesPath + "**")
- .withColumnRenamed("image", inputCol)
-
- lazy val greyscaleImageLocation: String = {
- val loc = "/tmp/greyscale.jpg"
- val f = new File(loc)
- if (f.exists()) {f.delete()}
- FileUtils.copyURLToFile(new URL("https://mmlspark.blob.core.windows.net/datasets/LIME/greyscale.jpg"), f)
- loc
- }
-
- lazy val greyscaleImage: DataFrame = spark
- .read.image.load(greyscaleImageLocation)
- .select(col("image").alias(inputCol))
-
- lazy val greyscaleBinary: DataFrame = spark
- .read.binary.load(greyscaleImageLocation)
- .select(col("value.bytes").alias(inputCol))
-
def resNetModel(): ImageFeaturizer = new ImageFeaturizer()
.setInputCol(inputCol)
.setOutputCol(outputCol)
@@ -68,7 +37,7 @@ trait NetworkUtils extends CNTKTestUtils with FileReaderUtils {
}
class ImageFeaturizerSuite extends TransformerFuzzing[ImageFeaturizer]
- with NetworkUtils {
+ with TrainedCNTKModelUtils {
test("Image featurizer should reproduce the CIFAR10 experiment") {
print(spark)
diff --git a/src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala b/deep-learning/src/test/scala/com/microsoft/ml/spark/lime/ImageLIMESuite.scala
similarity index 65%
rename from src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala
rename to deep-learning/src/test/scala/com/microsoft/ml/spark/lime/ImageLIMESuite.scala
index e83f910e377..892ba9823d8 100644
--- a/src/test/scala/com/microsoft/ml/spark/lime/LIMESuite.scala
+++ b/deep-learning/src/test/scala/com/microsoft/ml/spark/lime/ImageLIMESuite.scala
@@ -7,82 +7,23 @@ import java.awt.image.BufferedImage
import java.io.File
import java.net.URL
-import breeze.linalg.{*, DenseMatrix}
-import breeze.stats.distributions.Rand
-import com.microsoft.ml.spark.core.test.base.TestBase
-import com.microsoft.ml.spark.core.test.fuzzing.{EstimatorFuzzing, TestObject, TransformerFuzzing}
-import com.microsoft.ml.spark.image.{ImageFeaturizer, NetworkUtils}
+import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
+import com.microsoft.ml.spark.image.{ImageFeaturizer, TrainedCNTKModelUtils}
import com.microsoft.ml.spark.io.IOImplicits._
import com.microsoft.ml.spark.io.image.ImageUtils
import com.microsoft.ml.spark.io.split1.FileReaderUtils
import com.microsoft.ml.spark.stages.UDFTransformer
import com.microsoft.ml.spark.stages.udfs.get_value_udf
import org.apache.commons.io.FileUtils
-import org.apache.spark.injections.UDFUtils
import org.apache.spark.ml.linalg.DenseVector
import org.apache.spark.ml.param.DataFrameEquality
-import org.apache.spark.ml.regression.LinearRegression
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.ml.{NamespaceInjections, PipelineModel}
import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.types.DoubleType
import org.apache.spark.sql.{DataFrame, Row}
-trait LimeTestBase extends TestBase {
-
- import spark.implicits._
-
- lazy val nRows = 100
- lazy val d1 = 3
- lazy val d2 = 1
-
- lazy val m: DenseMatrix[Double] = new DenseMatrix(d1, d2, Array(1.0, -1.0, 2.0))
- lazy val x: DenseMatrix[Double] = DenseMatrix.rand(nRows, d1, Rand.gaussian)
- lazy val noise: DenseMatrix[Double] = DenseMatrix.rand(nRows, d2, Rand.gaussian) * 0.1
- lazy val y = x * m //+ noise
-
- lazy val xRows = x(*, ::).iterator.toSeq.map(dv => new DenseVector(dv.toArray))
- lazy val yRows = y(*, ::).iterator.toSeq.map(dv => dv(0))
- lazy val df = xRows.zip(yRows).toDF("features", "label")
-
- lazy val model = new LinearRegression().fit(df)
-
- lazy val lime = new TabularLIME()
- .setModel(model)
- .setInputCol("features")
- .setPredictionCol(model.getPredictionCol)
- .setOutputCol("out")
- .setNSamples(1000)
-
- lazy val limeModel = lime.fit(df)
-}
-
-class TabularLIMESuite extends EstimatorFuzzing[TabularLIME] with
- DataFrameEquality with LimeTestBase {
-
- test("text lime usage test check") {
- val results = limeModel.transform(df).select("out")
- .collect().map(_.getAs[DenseVector](0))
- results.foreach(result => assert(result === new DenseVector(m.data)))
- }
-
- override def testObjects(): Seq[TestObject[TabularLIME]] = Seq(new TestObject(lime, df))
-
- override def reader: MLReadable[_] = TabularLIME
-
- override def modelReader: MLReadable[_] = TabularLIMEModel
-}
-
-class TabularLIMEModelSuite extends TransformerFuzzing[TabularLIMEModel] with
- DataFrameEquality with LimeTestBase {
-
- override def testObjects(): Seq[TestObject[TabularLIMEModel]] = Seq(new TestObject(limeModel, df))
-
- override def reader: MLReadable[_] = TabularLIMEModel
-}
-
class ImageLIMESuite extends TransformerFuzzing[ImageLIME] with
- DataFrameEquality with NetworkUtils with FileReaderUtils {
+ DataFrameEquality with TrainedCNTKModelUtils with FileReaderUtils {
lazy val greyhoundImageLocation: String = {
val loc = "/tmp/greyhound.jpg"
diff --git a/docs/cogsvc.md b/docs/cogsvc.md
index edec95f3751..949ae14c96a 100644
--- a/docs/cogsvc.md
+++ b/docs/cogsvc.md
@@ -9,7 +9,7 @@
Azure Cognitive Services on Spark enable working with Azure’s Intelligent Services at massive scales with the Apache Spark™ distributed computing ecosystem. Cognitive Services on Spark allows users to embed general purpose and continuously improving intelligent models directly into their Apache Spark™ and SQL computations. This liberates developers from low-level networking details, so they can focus on creating intelligent, distributed applications. Each Cognitive Service acts as a SparkML transformer, so users can add services to existing SparkML pipelines. This is a great example of our [HTTP-on-Spark](http.md) capability that lets you interact with HTTP services from Apache Spark.
## Usage
-To see an example of Cognitive Services on Spark in action, take a look at [this sample notebook](../notebooks/samples/CognitiveServices%20-%20Celebrity%20Quote%20Analysis.ipynb).
+To see an example of Cognitive Services on Spark in action, take a look at [this sample notebook](../notebooks/CognitiveServices%20-%20Celebrity%20Quote%20Analysis.ipynb).
## Cognitive Services on Apache Sparkâ„¢
Currently, the following Cognitive Services are available on Apache Sparkâ„¢ through MMLSpark:
diff --git a/docs/datasets.md b/docs/datasets.md
index 8376027f4f4..595ae3d4098 100644
--- a/docs/datasets.md
+++ b/docs/datasets.md
@@ -24,7 +24,7 @@ tab-separated file with 2 columns (`rating`, `text`) and 10000 rows. The
contains free-form text strings in English language. You can use
`mmlspark.TextFeaturizer` to convert the text into feature vectors for machine
learning models ([see
-example](../notebooks/samples/201%20-%20Amazon%20Book%20Reviews%20-%20TextFeaturizer.ipynb)).
+example](../notebooks/201%20-%20Amazon%20Book%20Reviews%20-%20TextFeaturizer.ipynb)).
The example dataset is available
[here](https://mmlspark.azureedge.net/datasets/BookReviewsFromAmazon10K.tsv);
@@ -48,7 +48,7 @@ The example dataset is available
the original dataset is available [Krizhevsky's
page](https://www.cs.toronto.edu/~kriz/cifar.html). The dataset has been
packaged into a gzipped tar archive. See notebook [301 - CIFAR10 CNTK CNN
-Evaluation](../notebooks/samples/301%20-%20CIFAR10%20CNTK%20CNN%20Evaluation.ipynb)
+Evaluation](../notebooks/301%20-%20CIFAR10%20CNTK%20CNN%20Evaluation.ipynb)
for an example how to extract the image data.
Reference: [_Learning Multiple Layers of Features from Tiny
diff --git a/docs/lightgbm.md b/docs/lightgbm.md
index fed5bc34131..87d5c366f2e 100644
--- a/docs/lightgbm.md
+++ b/docs/lightgbm.md
@@ -49,7 +49,7 @@ model = LightGBMRegressor(application='quantile',
```
For an end to end application, check out the LightGBM [notebook
-example](../notebooks/samples/LightGBM%20-%20Quantile%20Regression%20for%20Drug%20Discovery.ipynb).
+example](../notebooks/LightGBM%20-%20Quantile%20Regression%20for%20Drug%20Discovery.ipynb).
### Architecture
diff --git a/docs/mmlspark-serving.md b/docs/mmlspark-serving.md
index d59e3e0c58a..9471644805f 100644
--- a/docs/mmlspark-serving.md
+++ b/docs/mmlspark-serving.md
@@ -25,7 +25,7 @@
### Jupyter Notebook Examples
-- [Deploy a classifier trained on the Adult Census Dataset](../notebooks/samples/SparkServing%20-%20Deploying%20a%20Classifier.ipynb)
+- [Deploy a classifier trained on the Adult Census Dataset](../notebooks/SparkServing%20-%20Deploying%20a%20Classifier.ipynb)
- More coming soon!
### Spark Serving Hello World
diff --git a/docs/vw.md b/docs/vw.md
index 6deaeedf089..ddb0b7f6920 100644
--- a/docs/vw.md
+++ b/docs/vw.md
@@ -58,7 +58,7 @@ model = (VowpalWabbitRegressor(args="--holdout_off --loss_function quantile -q :
Through the args parameter you can pass command line parameters to VW as documented in the [VW Wiki](https://github.com/vowpalWabbit/vowpal_wabbit/wiki/Command-Line-Arguments).
For an end to end application, check out the VowpalWabbit [notebook
-example](../notebooks/samples/Vowpal%20Wabbit%20-%20Quantile%20Regression%20for%20Drug%20Discovery.ipynb]).
+example](../notebooks/Vowpal%20Wabbit%20-%20Quantile%20Regression%20for%20Drug%20Discovery.ipynb]).
### Hyper-parameter tuning
diff --git a/environment.yaml b/environment.yaml
index 779ac784b2b..6e004d8e774 100644
--- a/environment.yaml
+++ b/environment.yaml
@@ -6,6 +6,7 @@ dependencies:
- python=3.6
- pyspark=3.0.1
- requests
+ - pip
- r-base
- r-dplyr
- r-sparklyr
diff --git a/src/main/python/mmlspark/lightgbm/LightGBMClassificationModel.py b/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMClassificationModel.py
similarity index 100%
rename from src/main/python/mmlspark/lightgbm/LightGBMClassificationModel.py
rename to lightgbm/src/main/python/mmlspark/lightgbm/LightGBMClassificationModel.py
diff --git a/src/main/python/mmlspark/lightgbm/LightGBMRankerModel.py b/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRankerModel.py
similarity index 100%
rename from src/main/python/mmlspark/lightgbm/LightGBMRankerModel.py
rename to lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRankerModel.py
diff --git a/src/main/python/mmlspark/lightgbm/LightGBMRegressionModel.py b/lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRegressionModel.py
similarity index 100%
rename from src/main/python/mmlspark/lightgbm/LightGBMRegressionModel.py
rename to lightgbm/src/main/python/mmlspark/lightgbm/LightGBMRegressionModel.py
diff --git a/src/test/python/mmlsparktest/cyber/__init__.py b/lightgbm/src/main/python/mmlspark/lightgbm/__init__.py
similarity index 100%
rename from src/test/python/mmlsparktest/cyber/__init__.py
rename to lightgbm/src/main/python/mmlspark/lightgbm/__init__.py
diff --git a/src/main/python/mmlspark/lightgbm/mixin.py b/lightgbm/src/main/python/mmlspark/lightgbm/mixin.py
similarity index 100%
rename from src/main/python/mmlspark/lightgbm/mixin.py
rename to lightgbm/src/main/python/mmlspark/lightgbm/mixin.py
diff --git a/src/main/scala/com/microsoft/lightgbm/SWIG.scala b/lightgbm/src/main/scala/com/microsoft/lightgbm/SWIG.scala
similarity index 100%
rename from src/main/scala/com/microsoft/lightgbm/SWIG.scala
rename to lightgbm/src/main/scala/com/microsoft/lightgbm/SWIG.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMBase.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMBase.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMBase.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMBase.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.txt b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.txt
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.txt
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMConstants.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMConstants.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMConstants.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMConstants.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDelegate.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDelegate.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDelegate.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMDelegate.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMModelMethods.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMModelMethods.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMModelMethods.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMModelMethods.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.txt b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.txt
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRanker.txt
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.txt b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.txt
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.txt
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMRegressor.txt
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMUtils.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala
similarity index 99%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala
index 6fc82765b8e..ccecaae33ec 100644
--- a/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala
+++ b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/TrainUtils.scala
@@ -8,7 +8,7 @@ import java.net._
import com.microsoft.ml.lightgbm._
import com.microsoft.ml.spark.core.env.StreamUtilities._
-import com.microsoft.ml.spark.downloader.FaultToleranceUtils
+import com.microsoft.ml.spark.core.utils.FaultToleranceUtils
import com.microsoft.ml.spark.lightgbm.booster.LightGBMBooster
import com.microsoft.ml.spark.lightgbm.dataset.LightGBMDataset
import com.microsoft.ml.spark.lightgbm.params.{ClassifierTrainParams, TrainParams}
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/booster/LightGBMBooster.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/booster/LightGBMBooster.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/booster/LightGBMBooster.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/booster/LightGBMBooster.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/LightGBMDataset.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/LightGBMDataset.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/LightGBMDataset.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/dataset/LightGBMDataset.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjParam.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjParam.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjParam.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjParam.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjTrait.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjTrait.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjTrait.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/FObjTrait.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMBoosterParam.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMBoosterParam.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMBoosterParam.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMBoosterParam.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMParams.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMParams.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMParams.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/LightGBMParams.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/params/TrainParams.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/TrainParams.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/params/TrainParams.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/params/TrainParams.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/lightgbm/swig/SwigUtils.scala b/lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/swig/SwigUtils.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/lightgbm/swig/SwigUtils.scala
rename to lightgbm/src/main/scala/com/microsoft/ml/spark/lightgbm/swig/SwigUtils.scala
diff --git a/src/test/resources/benchmarks/benchmarks_VerifyLightGBMClassifier.csv b/lightgbm/src/test/resources/benchmarks/benchmarks_VerifyLightGBMClassifier.csv
similarity index 100%
rename from src/test/resources/benchmarks/benchmarks_VerifyLightGBMClassifier.csv
rename to lightgbm/src/test/resources/benchmarks/benchmarks_VerifyLightGBMClassifier.csv
diff --git a/src/test/resources/benchmarks/benchmarks_VerifyLightGBMRegressor.csv b/lightgbm/src/test/resources/benchmarks/benchmarks_VerifyLightGBMRegressor.csv
similarity index 100%
rename from src/test/resources/benchmarks/benchmarks_VerifyLightGBMRegressor.csv
rename to lightgbm/src/test/resources/benchmarks/benchmarks_VerifyLightGBMRegressor.csv
diff --git a/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala b/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala
rename to lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split1/VerifyLightGBMClassifier.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRanker.scala b/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRanker.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRanker.scala
rename to lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRanker.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRegressor.scala b/lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRegressor.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRegressor.scala
rename to lightgbm/src/test/scala/com/microsoft/ml/spark/lightgbm/split2/VerifyLightGBMRegressor.scala
diff --git a/notebooks/samples/AzureSearchIndex - Met Artworks.ipynb b/notebooks/AzureSearchIndex - Met Artworks.ipynb
similarity index 100%
rename from notebooks/samples/AzureSearchIndex - Met Artworks.ipynb
rename to notebooks/AzureSearchIndex - Met Artworks.ipynb
diff --git a/notebooks/samples/Classification - Adult Census with Vowpal Wabbit.ipynb b/notebooks/Classification - Adult Census with Vowpal Wabbit.ipynb
similarity index 98%
rename from notebooks/samples/Classification - Adult Census with Vowpal Wabbit.ipynb
rename to notebooks/Classification - Adult Census with Vowpal Wabbit.ipynb
index e7098605ccc..4608bce764e 100644
--- a/notebooks/samples/Classification - Adult Census with Vowpal Wabbit.ipynb
+++ b/notebooks/Classification - Adult Census with Vowpal Wabbit.ipynb
@@ -8,7 +8,7 @@
"# Classification - Adult Census using Vowpal Wabbit in MMLSpark\n",
"\n",
"In this example, we predict incomes from the *Adult Census* dataset using Vowpal Wabbit (VW) classifier in MMLSpark.\n",
- "First, we read the data and split it into train and test sets as in this [example](https://github.com/Azure/mmlspark/blob/master/notebooks/samples/Classification%20-%20Adult%20Census.ipynb\n",
+ "First, we read the data and split it into train and test sets as in this [example](https://github.com/Azure/mmlspark/blob/master/notebooks/Classification%20-%20Adult%20Census.ipynb\n",
")."
]
},
diff --git a/notebooks/samples/Classification - Adult Census.ipynb b/notebooks/Classification - Adult Census.ipynb
similarity index 100%
rename from notebooks/samples/Classification - Adult Census.ipynb
rename to notebooks/Classification - Adult Census.ipynb
diff --git a/notebooks/samples/Classification - Before and After MMLSpark.ipynb b/notebooks/Classification - Before and After MMLSpark.ipynb
similarity index 100%
rename from notebooks/samples/Classification - Before and After MMLSpark.ipynb
rename to notebooks/Classification - Before and After MMLSpark.ipynb
diff --git a/notebooks/samples/Classification - Twitter Sentiment with Vowpal Wabbit.ipynb b/notebooks/Classification - Twitter Sentiment with Vowpal Wabbit.ipynb
similarity index 100%
rename from notebooks/samples/Classification - Twitter Sentiment with Vowpal Wabbit.ipynb
rename to notebooks/Classification - Twitter Sentiment with Vowpal Wabbit.ipynb
diff --git a/notebooks/samples/Cognitive Services - Overview.ipynb b/notebooks/Cognitive Services - Overview.ipynb
similarity index 100%
rename from notebooks/samples/Cognitive Services - Overview.ipynb
rename to notebooks/Cognitive Services - Overview.ipynb
diff --git a/notebooks/samples/CognitiveServices - Celebrity Quote Analysis.ipynb b/notebooks/CognitiveServices - Celebrity Quote Analysis.ipynb
similarity index 100%
rename from notebooks/samples/CognitiveServices - Celebrity Quote Analysis.ipynb
rename to notebooks/CognitiveServices - Celebrity Quote Analysis.ipynb
diff --git a/notebooks/samples/ConditionalKNN - Exploring Art Across Cultures.ipynb b/notebooks/ConditionalKNN - Exploring Art Across Cultures.ipynb
similarity index 100%
rename from notebooks/samples/ConditionalKNN - Exploring Art Across Cultures.ipynb
rename to notebooks/ConditionalKNN - Exploring Art Across Cultures.ipynb
diff --git a/notebooks/samples/CyberML - Anomalous Access Detection.ipynb b/notebooks/CyberML - Anomalous Access Detection.ipynb
similarity index 100%
rename from notebooks/samples/CyberML - Anomalous Access Detection.ipynb
rename to notebooks/CyberML - Anomalous Access Detection.ipynb
diff --git a/notebooks/samples/DeepLearning - BiLSTM Medical Entity Extraction.ipynb b/notebooks/DeepLearning - BiLSTM Medical Entity Extraction.ipynb
similarity index 100%
rename from notebooks/samples/DeepLearning - BiLSTM Medical Entity Extraction.ipynb
rename to notebooks/DeepLearning - BiLSTM Medical Entity Extraction.ipynb
diff --git a/notebooks/samples/DeepLearning - CIFAR10 Convolutional Network.ipynb b/notebooks/DeepLearning - CIFAR10 Convolutional Network.ipynb
similarity index 100%
rename from notebooks/samples/DeepLearning - CIFAR10 Convolutional Network.ipynb
rename to notebooks/DeepLearning - CIFAR10 Convolutional Network.ipynb
diff --git a/notebooks/samples/DeepLearning - Flower Image Classification.ipynb b/notebooks/DeepLearning - Flower Image Classification.ipynb
similarity index 100%
rename from notebooks/samples/DeepLearning - Flower Image Classification.ipynb
rename to notebooks/DeepLearning - Flower Image Classification.ipynb
diff --git a/notebooks/samples/DeepLearning - Transfer Learning.ipynb b/notebooks/DeepLearning - Transfer Learning.ipynb
similarity index 100%
rename from notebooks/samples/DeepLearning - Transfer Learning.ipynb
rename to notebooks/DeepLearning - Transfer Learning.ipynb
diff --git a/notebooks/samples/HttpOnSpark - Working with Arbitrary Web APIs.ipynb b/notebooks/HttpOnSpark - Working with Arbitrary Web APIs.ipynb
similarity index 100%
rename from notebooks/samples/HttpOnSpark - Working with Arbitrary Web APIs.ipynb
rename to notebooks/HttpOnSpark - Working with Arbitrary Web APIs.ipynb
diff --git a/notebooks/samples/HyperParameterTuning - Fighting Breast Cancer.ipynb b/notebooks/HyperParameterTuning - Fighting Breast Cancer.ipynb
similarity index 100%
rename from notebooks/samples/HyperParameterTuning - Fighting Breast Cancer.ipynb
rename to notebooks/HyperParameterTuning - Fighting Breast Cancer.ipynb
diff --git a/notebooks/samples/LightGBM - Overview.ipynb b/notebooks/LightGBM - Overview.ipynb
similarity index 100%
rename from notebooks/samples/LightGBM - Overview.ipynb
rename to notebooks/LightGBM - Overview.ipynb
diff --git a/notebooks/samples/ModelInterpretation - Snow Leopard Detection.ipynb b/notebooks/ModelInterpretation - Snow Leopard Detection.ipynb
similarity index 100%
rename from notebooks/samples/ModelInterpretation - Snow Leopard Detection.ipynb
rename to notebooks/ModelInterpretation - Snow Leopard Detection.ipynb
diff --git a/notebooks/samples/OpenCV - Pipeline Image Transformations.ipynb b/notebooks/OpenCV - Pipeline Image Transformations.ipynb
similarity index 100%
rename from notebooks/samples/OpenCV - Pipeline Image Transformations.ipynb
rename to notebooks/OpenCV - Pipeline Image Transformations.ipynb
diff --git a/notebooks/samples/Regression - Flight Delays with DataCleaning.ipynb b/notebooks/Regression - Flight Delays with DataCleaning.ipynb
similarity index 100%
rename from notebooks/samples/Regression - Flight Delays with DataCleaning.ipynb
rename to notebooks/Regression - Flight Delays with DataCleaning.ipynb
diff --git a/notebooks/samples/Regression - Auto Imports.ipynb b/notebooks/Regression - Auto Imports.ipynb
similarity index 100%
rename from notebooks/samples/Regression - Auto Imports.ipynb
rename to notebooks/Regression - Auto Imports.ipynb
diff --git a/notebooks/samples/Regression - Flight Delays.ipynb b/notebooks/Regression - Flight Delays.ipynb
similarity index 100%
rename from notebooks/samples/Regression - Flight Delays.ipynb
rename to notebooks/Regression - Flight Delays.ipynb
diff --git a/notebooks/samples/Regression - Vowpal Wabbit vs. LightGBM vs. Linear Regressor.ipynb b/notebooks/Regression - Vowpal Wabbit vs. LightGBM vs. Linear Regressor.ipynb
similarity index 100%
rename from notebooks/samples/Regression - Vowpal Wabbit vs. LightGBM vs. Linear Regressor.ipynb
rename to notebooks/Regression - Vowpal Wabbit vs. LightGBM vs. Linear Regressor.ipynb
diff --git a/notebooks/samples/SparkServing - Deploying a Classifier.ipynb b/notebooks/SparkServing - Deploying a Classifier.ipynb
similarity index 100%
rename from notebooks/samples/SparkServing - Deploying a Classifier.ipynb
rename to notebooks/SparkServing - Deploying a Classifier.ipynb
diff --git a/notebooks/samples/TextAnalytics - Amazon Book Reviews with Word2Vec.ipynb b/notebooks/TextAnalytics - Amazon Book Reviews with Word2Vec.ipynb
similarity index 100%
rename from notebooks/samples/TextAnalytics - Amazon Book Reviews with Word2Vec.ipynb
rename to notebooks/TextAnalytics - Amazon Book Reviews with Word2Vec.ipynb
diff --git a/notebooks/samples/TextAnalytics - Amazon Book Reviews.ipynb b/notebooks/TextAnalytics - Amazon Book Reviews.ipynb
similarity index 100%
rename from notebooks/samples/TextAnalytics - Amazon Book Reviews.ipynb
rename to notebooks/TextAnalytics - Amazon Book Reviews.ipynb
diff --git a/notebooks/samples/Vowpal Wabbit - Overview.ipynb b/notebooks/Vowpal Wabbit - Overview.ipynb
similarity index 100%
rename from notebooks/samples/Vowpal Wabbit - Overview.ipynb
rename to notebooks/Vowpal Wabbit - Overview.ipynb
diff --git a/src/main/python/mmlspark/opencv/ImageTransformer.py b/opencv/src/main/python/mmlspark/opencv/ImageTransformer.py
similarity index 100%
rename from src/main/python/mmlspark/opencv/ImageTransformer.py
rename to opencv/src/main/python/mmlspark/opencv/ImageTransformer.py
diff --git a/src/test/python/mmlsparktest/cyber/anamoly/__init__.py b/opencv/src/main/python/mmlspark/opencv/__init__.py
similarity index 100%
rename from src/test/python/mmlsparktest/cyber/anamoly/__init__.py
rename to opencv/src/main/python/mmlspark/opencv/__init__.py
diff --git a/src/main/scala/com/microsoft/ml/spark/image/ImageSetAugmenter.scala b/opencv/src/main/scala/com/microsoft/ml/spark/image/ImageSetAugmenter.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/image/ImageSetAugmenter.scala
rename to opencv/src/main/scala/com/microsoft/ml/spark/image/ImageSetAugmenter.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/opencv/ImageTransformer.scala b/opencv/src/main/scala/com/microsoft/ml/spark/opencv/ImageTransformer.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/opencv/ImageTransformer.scala
rename to opencv/src/main/scala/com/microsoft/ml/spark/opencv/ImageTransformer.scala
diff --git a/src/main/scala/com/microsoft/ml/spark/opencv/OpenCVUtils.scala b/opencv/src/main/scala/com/microsoft/ml/spark/opencv/OpenCVUtils.scala
similarity index 100%
rename from src/main/scala/com/microsoft/ml/spark/opencv/OpenCVUtils.scala
rename to opencv/src/main/scala/com/microsoft/ml/spark/opencv/OpenCVUtils.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/image/ImageSetAugmenterSuite.scala b/opencv/src/test/scala/com/microsoft/ml/spark/image/ImageSetAugmenterSuite.scala
similarity index 100%
rename from src/test/scala/com/microsoft/ml/spark/image/ImageSetAugmenterSuite.scala
rename to opencv/src/test/scala/com/microsoft/ml/spark/image/ImageSetAugmenterSuite.scala
diff --git a/src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala b/opencv/src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala
similarity index 97%
rename from src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala
rename to opencv/src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala
index 5d05a243ccf..b20b309bb05 100644
--- a/src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala
+++ b/opencv/src/test/scala/com/microsoft/ml/spark/image/ResizeImageTransformerSuite.scala
@@ -8,15 +8,15 @@ import java.net.URL
import com.microsoft.ml.spark.core.env.FileUtilities
import com.microsoft.ml.spark.core.test.fuzzing.{TestObject, TransformerFuzzing}
-import com.microsoft.ml.spark.opencv.{ImageTestUtils, ImageTransformer}
+import com.microsoft.ml.spark.io.IOImplicits._
+import com.microsoft.ml.spark.opencv.{ImageTransformer, OpenCVTestUtils}
+import org.apache.commons.io.FileUtils
import org.apache.spark.ml.linalg.DenseVector
import org.apache.spark.ml.util.MLReadable
import org.apache.spark.sql.{DataFrame, Row}
-import com.microsoft.ml.spark.io.IOImplicits._
-import org.apache.commons.io.FileUtils
class ResizeImageTransformerSuite extends TransformerFuzzing[ResizeImageTransformer]
- with ImageTestUtils {
+ with OpenCVTestUtils {
lazy val images: DataFrame = spark.read.image
.option("dropInvalid", true).load(FileUtilities.join(fileLocation, "**").toString)
diff --git a/src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala b/opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala
similarity index 98%
rename from src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala
rename to opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala
index 6c7ab6dfe53..62a43aa5e93 100644
--- a/src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala
+++ b/opencv/src/test/scala/com/microsoft/ml/spark/opencv/ImageTransformerSuite.scala
@@ -23,7 +23,7 @@ import org.opencv.imgproc.Imgproc
import org.scalactic.Equality
import org.scalatest.Assertion
-trait ImageTestUtils {
+trait OpenCVTestUtils {
lazy protected val fileLocation = FileUtilities.join(BuildInfo.datasetDir, "Images", "Grocery")
protected def selectTestImageBytes(images: DataFrame): Array[Byte] = {
@@ -81,7 +81,7 @@ trait ImageTestUtils {
}
-class UnrollImageSuite extends TransformerFuzzing[UnrollImage] with ImageTestUtils with DataFrameEquality {
+class UnrollImageSuite extends TransformerFuzzing[UnrollImage] with OpenCVTestUtils with DataFrameEquality {
lazy val filesRoot = BuildInfo.datasetDir
lazy val imagePath = FileUtilities.join(filesRoot,"Images", "CIFAR").toString
@@ -128,7 +128,7 @@ class UnrollImageSuite extends TransformerFuzzing[UnrollImage] with ImageTestUti
}
class UnrollBinaryImageSuite extends TransformerFuzzing[UnrollBinaryImage]
- with ImageTestUtils with DataFrameEquality {
+ with OpenCVTestUtils with DataFrameEquality {
lazy val filesRoot = BuildInfo.datasetDir
lazy val imagePath = FileUtilities.join(filesRoot, "Images", "CIFAR").toString
@@ -163,7 +163,7 @@ class UnrollBinaryImageSuite extends TransformerFuzzing[UnrollBinaryImage]
override def reader: UnrollBinaryImage.type = UnrollBinaryImage
}
-class ImageTransformerSuite extends TransformerFuzzing[ImageTransformer] with ImageTestUtils {
+class ImageTransformerSuite extends TransformerFuzzing[ImageTransformer] with OpenCVTestUtils {
//TODO this is needed to stop the build from freezing
override def assertDFEq(df1: DataFrame, df2: DataFrame)(implicit eq: Equality[DataFrame]): Unit = {
diff --git a/pipeline.yaml b/pipeline.yaml
index 658419f1640..bf8e8fd3779 100644
--- a/pipeline.yaml
+++ b/pipeline.yaml
@@ -33,7 +33,6 @@ jobs:
pool:
vmImage: ubuntu-18.04
steps:
- - template: templates/ivy_cache.yml
- task: AzureCLI@1
displayName: 'Style Check'
inputs:
@@ -128,7 +127,6 @@ jobs:
pool:
vmImage: ubuntu-18.04
steps:
- - template: templates/ivy_cache.yml
- task: AzureCLI@1
displayName: 'Get Docker Tag + Version'
inputs:
diff --git a/project/BlobMavenPlugin.scala b/project/BlobMavenPlugin.scala
new file mode 100644
index 00000000000..de8114172e0
--- /dev/null
+++ b/project/BlobMavenPlugin.scala
@@ -0,0 +1,48 @@
+import java.io.File
+
+import BlobMavenPlugin.autoImport.publishBlob
+import BuildUtils.{join, uploadToBlob}
+import sbt._
+import Keys._
+import org.apache.ivy.core.IvyPatternHelper
+
+//noinspection ScalaStyle
+object BlobMavenPlugin extends AutoPlugin {
+ override def trigger = allRequirements
+
+ object autoImport {
+ val publishBlob = TaskKey[Unit]("publishBlob", "publish the library to mmlspark blob")
+ val blobArtifactInfo = SettingKey[String]("blobArtifactInfo")
+ }
+
+ import autoImport._
+
+ override def requires: Plugins = sbt.Plugins.empty
+
+ override lazy val projectSettings: Seq[Setting[_]] = Seq(
+ publishBlob := {
+ publishM2.value
+ //TODO make this more general - 1.0 is a hack and not sure of a way to get this with sbt keys
+ val sourceArtifactName = s"${moduleName.value}_${scalaBinaryVersion.value}_1.0"
+ val destArtifactName = s"${moduleName.value}"
+ val repositoryDir = new File(new URI(Resolver.mavenLocal.root))
+ val orgDirs = organization.value.split(".".toCharArray.head)
+ val localPackageFolder = join(repositoryDir, orgDirs ++ Seq(sourceArtifactName, version.value):_*).toString
+ val blobMavenFolder = (orgDirs ++ Seq(destArtifactName, version.value)).mkString("/")
+ uploadToBlob(localPackageFolder, blobMavenFolder, "maven")
+ println(blobArtifactInfo.value)
+ },
+ blobArtifactInfo := {
+ s"""
+ |MMLSpark Build and Release Information
+ |---------------
+ |
+ |### Maven Coordinates
+ | `${organization.value}:${moduleName.value}:${version.value}`
+ |
+ |### Maven Resolver
+ | `https://mmlspark.azureedge.net/maven`
+ |""".stripMargin
+ }
+ )
+}
\ No newline at end of file
diff --git a/project/CodegenPlugin.scala b/project/CodegenPlugin.scala
new file mode 100644
index 00000000000..59bd294aca0
--- /dev/null
+++ b/project/CodegenPlugin.scala
@@ -0,0 +1,211 @@
+import java.io.File
+
+import BuildUtils.{join, runCmd, singleUploadToBlob, zipFolder}
+import CondaPlugin.autoImport.{activateCondaEnv, condaEnvLocation, createCondaEnvTask}
+import org.apache.commons.io.FileUtils
+import sbt.Keys._
+import sbt._
+import spray.json._
+
+object CodegenConfigProtocol extends DefaultJsonProtocol {
+ implicit val CCFormat: RootJsonFormat[CodegenConfig] = jsonFormat8(CodegenConfig.apply)
+}
+
+import CodegenConfigProtocol._
+
+case class CodegenConfig(name: String,
+ jarName: Option[String],
+ topDir: String,
+ targetDir: String,
+ version: String,
+ pythonizedVersion: String,
+ rVersion: String,
+ packageName: String)
+
+//noinspection ScalaStyle
+object CodegenPlugin extends AutoPlugin {
+ override def trigger = allRequirements
+
+ override def requires: Plugins = CondaPlugin
+
+ def rCmd(activateCondaEnv: Seq[String], cmd: Seq[String], wd: File, libPath: String): Unit = {
+ runCmd(activateCondaEnv ++ cmd, wd, Map("R_LIBS" -> libPath, "R_USER_LIBS" -> libPath))
+ }
+
+ object autoImport {
+ val pythonizedVersion = settingKey[String]("Pythonized version")
+ val rVersion = settingKey[String]("R version")
+ val genPackageNamespace = settingKey[String]("genPackageNamespace")
+ val genTestPackageNamespace = settingKey[String]("genTestPackageNamespace")
+ val genJarName = settingKey[Option[String]]("genJarName")
+
+ val targetDir = settingKey[File]("targetDir")
+ val codegenDir = settingKey[File]("codegenDir")
+
+ val codegen = TaskKey[Unit]("codegen", "Generate Code")
+ val testgen = TaskKey[Unit]("testgen", "Generate Tests")
+
+ val packageR = TaskKey[Unit]("packageR", "Generate roxygen docs and zip R package")
+ val publishR = TaskKey[Unit]("publishR", "publish R package to blob")
+ val testR = TaskKey[Unit]("testR", "Run testthat on R tests")
+
+ val packagePython = TaskKey[Unit]("packagePython", "Package python sdk")
+ val installPipPackage = TaskKey[Unit]("installPipPackage", "install python sdk")
+ val publishPython = TaskKey[Unit]("publishPython", "publish python wheel")
+ val testPython = TaskKey[Unit]("testPython", "test python sdk")
+
+ val mergePyCodeDir = SettingKey[File]("mergePyCodeDir")
+ val mergePyCode = TaskKey[Unit]("mergePyCode", "copy python code to a destination")
+
+ val codegenArgs = settingKey[String]("codegenArgs")
+ }
+
+ import autoImport._
+
+ override lazy val projectSettings: Seq[Setting[_]] = Seq(
+ publishMavenStyle := true,
+ codegenArgs := {
+ CodegenConfig(
+ name.value,
+ genJarName.value,
+ baseDirectory.value.getAbsolutePath,
+ targetDir.value.getAbsolutePath,
+ version.value,
+ pythonizedVersion.value,
+ rVersion.value,
+ genPackageNamespace.value
+ ).toJson.compactPrint
+ },
+ genJarName := {
+ Some(artifactName.value(
+ ScalaVersion(scalaVersion.value, scalaBinaryVersion.value),
+ projectID.value,
+ artifact.value))
+ },
+ codegen := (Def.taskDyn {
+ (Compile / compile).value
+ (Test / compile).value
+ val arg = codegenArgs.value
+ Def.task {
+ (Compile / runMain).toTask(s" com.microsoft.ml.spark.codegen.CodeGen $arg").value
+ }
+ }.value),
+ testgen := (Def.taskDyn {
+ (Compile / compile).value
+ (Test / compile).value
+ val arg = codegenArgs.value
+ Def.task {
+ (Test / runMain).toTask(s" com.microsoft.ml.spark.codegen.TestGen $arg").value
+ }
+ }.value),
+ pythonizedVersion := {
+ if (version.value.contains("-")) {
+ version.value.split("-".head).head + ".dev1"
+ } else {
+ version.value
+ }
+ },
+ rVersion := {
+ if (version.value.contains("-")) {
+ version.value.split("-".head).head
+ } else {
+ version.value
+ }
+ },
+ packageR := {
+ createCondaEnvTask.value
+ codegen.value
+ val rSrcDir = join(codegenDir.value, "src", "R", genPackageNamespace.value)
+ val rPackageDir = join(codegenDir.value, "package", "R")
+ val libPath = join(condaEnvLocation.value, "Lib", "R", "library").toString
+ rCmd(activateCondaEnv.value, Seq("R", "-q", "-e", "roxygen2::roxygenise()"), rSrcDir, libPath)
+ rPackageDir.mkdirs()
+ zipFolder(rSrcDir, new File(rPackageDir, s"${name.value}-${version.value}.zip"))
+ },
+ testR := {
+ packageR.value
+ publishLocal.value
+ val libPath = join(condaEnvLocation.value, "Lib", "R", "library").toString
+ val rSrcDir = join(codegenDir.value, "src", "R", genPackageNamespace.value)
+ rCmd(activateCondaEnv.value,
+ Seq("R", "CMD", "INSTALL", "--no-multiarch", "--with-keep.source", genPackageNamespace.value),
+ rSrcDir.getParentFile, libPath)
+ val testRunner = join("tools", "tests", "run_r_tests.R").getAbsolutePath
+ rCmd(activateCondaEnv.value,
+ Seq("Rscript", testRunner), rSrcDir, libPath)
+ },
+ publishR := {
+ codegen.value
+ packageR.value
+ val rPackageDir = join(codegenDir.value, "package", "R")
+ val rPackage = rPackageDir.listFiles().head
+ singleUploadToBlob(rPackage.toString, rPackage.getName, "rrr")
+ },
+ packagePython := {
+ codegen.value
+ createCondaEnvTask.value
+ val destPyDir = join(targetDir.value, "classes", genPackageNamespace.value)
+ val packageDir = join(codegenDir.value, "package", "python").absolutePath
+ val pythonSrcDir = join(codegenDir.value, "src", "python")
+ if (destPyDir.exists()) FileUtils.forceDelete(destPyDir)
+ val sourcePyDir = join(pythonSrcDir.getAbsolutePath, genPackageNamespace.value)
+ FileUtils.copyDirectory(sourcePyDir, destPyDir)
+ runCmd(
+ activateCondaEnv.value ++
+ Seq(s"python", "setup.py", "bdist_wheel", "--universal", "-d", packageDir),
+ pythonSrcDir)
+ },
+ installPipPackage := {
+ packagePython.value
+ publishLocal.value
+ runCmd(
+ activateCondaEnv.value ++ Seq("pip", "install", "-I",
+ s"${name.value.replace("-", "_")}-${pythonizedVersion.value}-py2.py3-none-any.whl"),
+ join(codegenDir.value, "package", "python"))
+ },
+ publishPython := {
+ publishLocal.value
+ packagePython.value
+ val fn = s"${name.value.replace("-", "_")}-${pythonizedVersion.value}-py2.py3-none-any.whl"
+ singleUploadToBlob(
+ join(codegenDir.value, "package", "python", fn).toString,
+ version.value + "/" + fn, "pip")
+ },
+ mergePyCode := {
+ val srcDir = join(codegenDir.value, "src", "python", genPackageNamespace.value)
+ val destDir = join(mergePyCodeDir.value, "src", "python", genPackageNamespace.value)
+ FileUtils.copyDirectory(srcDir, destDir)
+ },
+ testPython := {
+ installPipPackage.value
+ testgen.value
+ runCmd(
+ activateCondaEnv.value ++ Seq("python",
+ "-m",
+ "pytest",
+ s"--cov=${genPackageNamespace.value}",
+ "--junitxml=../../../../python-test-results.xml",
+ "--cov-report=xml",
+ genTestPackageNamespace.value
+ ),
+ new File(codegenDir.value, "test/python/")
+ )
+ },
+ targetDir := {
+ artifactPath.in(packageBin).in(Compile).value.getParentFile
+ },
+ mergePyCodeDir := {
+ join(baseDirectory.value.getParent, "target", "scala-2.12", "sbt-1.0", "generated")
+ },
+ codegenDir := {
+ join(targetDir.value, "generated")
+ },
+ genPackageNamespace := {
+ "mmlspark"
+ },
+ genTestPackageNamespace := {
+ "mmlspark-test"
+ }
+
+ )
+}
\ No newline at end of file
diff --git a/project/CondaPlugin.scala b/project/CondaPlugin.scala
new file mode 100644
index 00000000000..4e3e3ce005b
--- /dev/null
+++ b/project/CondaPlugin.scala
@@ -0,0 +1,56 @@
+import BuildUtils.{osPrefix, runCmd}
+import sbt._
+import Keys._
+
+import scala.sys.process.Process
+
+//noinspection ScalaStyle
+object CondaPlugin extends AutoPlugin {
+ override def trigger = allRequirements
+
+ object autoImport {
+ val condaEnvName = settingKey[String]("Name of conda environment")
+ val cleanCondaEnvTask = TaskKey[Unit]("cleanCondaEnv", "create conda env")
+ val condaEnvLocation = TaskKey[File]("condaEnvLocation", "get install location of conda env")
+ val createCondaEnvTask = TaskKey[Unit]("createCondaEnv", "create conda env")
+ val activateCondaEnv = settingKey[Seq[String]]("commands to activate conda environment")
+ }
+
+ import autoImport._
+ override lazy val globalSettings: Seq[Setting[_]] = Seq(
+ condaEnvName := "mmlspark",
+ cleanCondaEnvTask := {
+ runCmd(Seq("conda", "env", "remove", "--name", condaEnvName.value, "-y"))
+ },
+ condaEnvLocation := {
+ createCondaEnvTask.value
+ new File(Process("conda env list").lineStream.toList
+ .map(_.split("\\s+"))
+ .map(l => (l.head, l.reverse.head))
+ .filter(p => p._1 == condaEnvName.value)
+ .head._2)
+ },
+ createCondaEnvTask := {
+ val hasEnv = Process("conda env list").lineStream.toList
+ .map(_.split("\\s+").head).contains(condaEnvName.value)
+ if (!hasEnv) {
+ runCmd(Seq("conda", "env", "create", "-f", "environment.yaml"))
+ } else {
+ println("Found conda env " + condaEnvName.value)
+ }
+ },
+ activateCondaEnv := {
+ if (sys.props("os.name").toLowerCase.contains("windows")) {
+ osPrefix ++ Seq("activate", condaEnvName.value, "&&")
+ } else {
+ Seq()
+ //TODO figure out why this doesent work
+ //Seq("/bin/bash", "-l", "-c", "source activate " + condaEnvName, "&&")
+ }
+ }
+ )
+
+ override def requires: Plugins = sbt.Plugins.empty
+
+ override lazy val projectSettings: Seq[Setting[_]] = Seq()
+}
\ No newline at end of file
diff --git a/project/build.scala b/project/build.scala
index f7816cd5d48..06a930e33d1 100644
--- a/project/build.scala
+++ b/project/build.scala
@@ -2,8 +2,12 @@ import java.io.File
import java.lang.ProcessBuilder.Redirect
object BuildUtils {
+ def join(root: File, folders: String*): File = {
+ folders.foldLeft(root) { case (f, s) => new File(f, s) }
+ }
+
def join(folders: String*): File = {
- folders.tail.foldLeft(new File(folders.head)) { case (f, s) => new File(f, s) }
+ join(new File(folders.head), folders.tail: _*)
}
def isWindows: Boolean = {
@@ -27,7 +31,7 @@ object BuildUtils {
.redirectError(Redirect.INHERIT)
.redirectOutput(Redirect.INHERIT)
val env = pb.environment()
- envVars.foreach(p =>env.put(p._1,p._2))
+ envVars.foreach(p => env.put(p._1, p._2))
assert(pb.start().waitFor() == 0)
}
@@ -56,6 +60,7 @@ object BuildUtils {
"--account-key", Secrets.storageKey)
runCmd(osPrefix ++ command)
}
+
def singleUploadToBlob(source: String,
dest: String,
container: String,
@@ -76,6 +81,7 @@ object BuildUtils {
val (dirs, files) = dir.listFiles.sorted.partition(_.isDirectory)
(if (pred == null) files else files.filter(pred)) ++ dirs.flatMap(loop)
}
+
loop(dir)
}
@@ -91,7 +97,9 @@ object BuildUtils {
zip.putNextEntry(new ZipEntry(file.toString.substring(prefixLen).replace(java.io.File.separator, "/")))
val in = new BufferedInputStream(new FileInputStream(file), bufferSize)
var b = 0
- while (b >= 0) { zip.write(data, 0, b); b = in.read(data, 0, bufferSize) }
+ while (b >= 0) {
+ zip.write(data, 0, b); b = in.read(data, 0, bufferSize)
+ }
in.close()
zip.closeEntry()
}
diff --git a/project/plugins.sbt b/project/plugins.sbt
index cc082cf59b0..6f4bd427f23 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -4,4 +4,4 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.9.0")
addSbtPlugin("org.xerial.sbt" % "sbt-sonatype" % "3.8")
addSbtPlugin("com.jsuereth" % "sbt-pgp" % "1.1.1")
addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.0.0")
-addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.6.0")
\ No newline at end of file
+addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.6.0")
diff --git a/src/main/python/setup.py b/src/main/python/setup.py
deleted file mode 100644
index 3ba8474be22..00000000000
--- a/src/main/python/setup.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# Copyright (C) Microsoft Corporation. All rights reserved.
-# Licensed under the MIT License. See LICENSE in project root for information.
-
-import os
-from setuptools import setup, find_packages
-import codecs
-import os.path
-
-
-def read(rel_path):
- here = os.path.abspath(os.path.dirname(__file__))
- with codecs.open(os.path.join(here, rel_path), "r") as fp:
- return fp.read()
-
-
-def get_version(rel_path):
- for line in read(rel_path).splitlines():
- if line.startswith("__version__"):
- delim = '"' if '"' in line else "'"
- return line.split(delim)[1]
- return "0.0.0"
-
-
-setup(
- name="mmlspark",
- version=get_version("mmlspark/__init__.py"),
- description="Microsoft ML for Spark",
- long_description="Microsoft ML for Apache Spark contains Microsoft's open source "
- + "contributions to the Apache Spark ecosystem",
- license="MIT",
- packages=find_packages(),
- url="https://github.com/Azure/mmlspark",
- author="Microsoft",
- author_email="mmlspark-support@microsoft.com",
- classifiers=[
- "Development Status :: 3 - Alpha",
- "Intended Audience :: Developers",
- "Intended Audience :: Data Scientists",
- "Topic :: Software Development :: Datascience Tools",
- "License :: OSI Approved :: MIT License",
- "Programming Language :: Python :: 2",
- "Programming Language :: Python :: 3",
- ],
- zip_safe=True,
- package_data={"mmlspark": ["../LICENSE.txt", "../README.txt"]},
-)
diff --git a/src/main/scala/com/microsoft/ml/spark/codegen/CodegenConfig.scala b/src/main/scala/com/microsoft/ml/spark/codegen/CodegenConfig.scala
deleted file mode 100644
index 03785cbd8c9..00000000000
--- a/src/main/scala/com/microsoft/ml/spark/codegen/CodegenConfig.scala
+++ /dev/null
@@ -1,72 +0,0 @@
-// Copyright (C) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License. See LICENSE in project root for information.
-
-package com.microsoft.ml.spark.codegen
-
-import java.io.File
-
-import com.microsoft.ml.spark.build.BuildInfo
-
-object Config {
- val DebugMode = sys.env.getOrElse("DEBUGMODE", "").trim.toLowerCase == "true"
-
- val TopDir = BuildInfo.baseDirectory
- val Version = BuildInfo.version
- val PackageName = BuildInfo.name
- val TargetDir = new File(TopDir, s"target/scala-${BuildInfo.scalaVersion.slice(0,4)}")
- val ScalaSrcDir = "src/main/scala"
-
- val GeneratedDir = new File(TargetDir, "generated")
- val PackageDir = new File(GeneratedDir, "package")
- val SrcDir = new File(GeneratedDir, "src")
- val TestDir = new File(GeneratedDir, "test")
- val DocDir = new File(GeneratedDir, "doc")
- val TestDataDir = new File(GeneratedDir, "test-data")
-
- //Python Codegen Constant
- val PySrcDir = new File(SrcDir, "python")
- val PyPackageDir = new File(PackageDir, "python")
- val PyTestDir = new File(TestDir, "python")
- val PySrcOverrideDir = new File(TopDir, "src/main/python")
- val PyTestOverrideDir = new File(TopDir, "src/test/python")
-
- //R Codegen Constants
- val RSrcRoot = new File(SrcDir, "R")
- val RSrcDir = new File(RSrcRoot, "mmlspark/R")
- val RPackageDir = new File(PackageDir, "R")
- val RTestDir = new File(RSrcRoot, "mmlspark/tests")
-
- val RTestOverrideDir = new File(TopDir, "src/test/R")
- val RSrcOverrideDir = new File(TopDir, "src/main/R")
-
- //val rPackageFile = new File(rPackageDir, s"mmlspark-$mmlVer.zip")
-
- val InternalPrefix = "_"
- val ScopeDepth = " " * 4
-
- val CopyrightLines =
- s"""|# Copyright (C) Microsoft Corporation. All rights reserved.
- |# Licensed under the MIT License. See LICENSE in project root for information.
- |""".stripMargin
-
- // The __init__.py file
- def packageHelp(importString: String): String = {
- s"""|$CopyrightLines
- |
- |"\""
- |MicrosoftML is a library of Python classes to interface with the
- |Microsoft scala APIs to utilize Apache Spark to create distibuted
- |machine learning models.
- |
- |MicrosoftML simplifies training and scoring classifiers and
- |regressors, as well as facilitating the creation of models using the
- |CNTK library, images, and text.
- |"\""
- |
- |__version__ = "${BuildInfo.pythonizedVersion}"
- |__spark_package_version__ = "${BuildInfo.version}"
- |
- |$importString
- |""".stripMargin
- }
-}
diff --git a/src/test/python/mmlsparktest/nn/__init__.py b/src/test/python/mmlsparktest/nn/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/test/python/mmlsparktest/recommendation/__init__.py b/src/test/python/mmlsparktest/recommendation/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/test/python/mmlsparktest/vw/__init__.py b/src/test/python/mmlsparktest/vw/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/src/test/scala/com/microsoft/ml/spark/cntk/CNTKTestUtils.scala b/src/test/scala/com/microsoft/ml/spark/cntk/CNTKTestUtils.scala
deleted file mode 100644
index 4981013301c..00000000000
--- a/src/test/scala/com/microsoft/ml/spark/cntk/CNTKTestUtils.scala
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (C) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License. See LICENSE in project root for information.
-
-package com.microsoft.ml.spark.cntk
-
-import java.io.File
-
-import com.microsoft.ml.spark.build.BuildInfo
-import com.microsoft.ml.spark.core.env.FileUtilities
-import com.microsoft.ml.spark.core.test.base.TestBase
-import com.microsoft.ml.spark.image.UnrollImage
-import org.apache.spark.ml.linalg.DenseVector
-import org.apache.spark.sql._
-import com.microsoft.ml.spark.io.IOImplicits._
-
-trait CNTKTestUtils extends TestBase {
-
- val filesRoot = BuildInfo.datasetDir.toString
- val imagePath = FileUtilities.join(filesRoot, "Images", "CIFAR").toString
- val modelPath = FileUtilities.join(filesRoot, "CNTKModel", "ConvNet_CIFAR10.model").toString
- val inputCol = "cntk_images"
- val outputCol = "out"
- val labelCol = "labels"
-
- val featureVectorLength = 3 * 32 * 32
- lazy val saveFile = new File(tmpDir.toFile, "spark-z.model").toString
-
- def testModelDF(spark: SparkSession): DataFrame = {
- import spark.implicits._
- spark.sparkContext.parallelize(Seq(
- Array(1.32165250, -2.1215112, 0.63150704, 0.77315974, -1.28163720,
- -0.20210080, -2.2839167, -2.08691480, 5.08418200, -1.33741090),
- Array(3.44079640, 1.4877119, -0.74059330, -0.34381202, -2.48724990,
- -2.62866950, -3.1693816, -3.14182600, 4.76314800, 0.68712880),
- Array(-1.88747900, -4.7685330, 0.15169683, 6.80547570, -0.38405967,
- 3.41065170, 1.3302778, -0.87714905, -2.18046050, -4.16661830),
- Array(5.01010300, 3.9860306, -1.36795600, -0.89830830, -4.49545430,
- -4.19537070, -4.4045380, -5.81759450, 6.93805700, 1.49001510),
- Array(-4.70754600, -6.0414960, 1.20658250, 5.40738300, 1.07661690,
- 4.71566440, 4.3834330, -1.57187440, -2.96569730, -5.43208270),
- Array(-1.23873880, -3.2042341, 2.54533000, 5.51954800, 2.89042470,
- 0.12380804, 3.8639085, -4.79466800, -2.41463420, -5.17418430))).toDF
- }
-
- def testImages(spark: SparkSession): DataFrame = {
- val images = spark.read.image.load(imagePath)
-
- val unroll = new UnrollImage().setInputCol("image").setOutputCol(inputCol)
-
- unroll.transform(images).select(inputCol)
- }
-
- def makeFakeData(spark: SparkSession, rows: Int, size: Int, outputDouble: Boolean = false): DataFrame = {
- import spark.implicits._
- if (outputDouble) {
- List
- .fill(rows)(List.fill(size)(0.0).toArray)
- .zip(List.fill(rows)(0.0))
- .toDF(inputCol, labelCol)
- } else {
- List
- .fill(rows)(List.fill(size)(0.0.toFloat).toArray)
- .zip(List.fill(rows)(0.0))
- .toDF(inputCol, labelCol)
- }
- }
-
- protected def compareToTestModel(result: DataFrame) = {
- //TODO improve checks
- assert(result.columns.toSet == Set(inputCol, outputCol))
- assert(result.count() == testModelDF(result.sparkSession).count())
- val max = result
- .select(outputCol)
- .collect()
- .map(row => row.getAs[DenseVector](0).toArray.max)
- .max
- assert(max < 10 & max > -10)
- }
-
-}
diff --git a/src/test/scala/com/microsoft/ml/spark/codegen/CodeGen.scala b/src/test/scala/com/microsoft/ml/spark/codegen/CodeGen.scala
deleted file mode 100644
index 67d667e339e..00000000000
--- a/src/test/scala/com/microsoft/ml/spark/codegen/CodeGen.scala
+++ /dev/null
@@ -1,176 +0,0 @@
-// Copyright (C) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License. See LICENSE in project root for information.
-
-package com.microsoft.ml.spark.codegen
-
-import java.io.File
-import com.microsoft.ml.spark.build.BuildInfo
-import com.microsoft.ml.spark.codegen.Config._
-import com.microsoft.ml.spark.core.env.FileUtilities._
-import com.microsoft.ml.spark.core.test.base.TestBase
-import com.microsoft.ml.spark.core.test.fuzzing.PyTestFuzzing
-import com.microsoft.ml.spark.core.utils.JarLoadingUtils.instantiateServices
-import org.apache.commons.io.FileUtils
-import org.apache.commons.io.FilenameUtils._
-
-object CodeGenUtils {
- def clean(dir: File): Unit = if (dir.exists()) FileUtils.forceDelete(dir)
-
- def toDir(f: File): File = new File(f, File.separator)
-}
-
-object CodeGen {
-
- import CodeGenUtils._
-
- def generatePythonClasses(): Unit = {
- instantiateServices[PythonWrappable].foreach { w =>
- w.makePyFile()
- }
- }
-
- def generateRClasses(): Unit = {
- instantiateServices[RWrappable].foreach { w =>
- w.makeRFile()
- }
- }
-
- private def makeInitFiles(packageFolder: String = ""): Unit = {
- val dir = new File(new File(PySrcDir, "mmlspark"), packageFolder)
- val packageString = if (packageFolder != "") packageFolder.replace("/", ".") else ""
- val importStrings =
- dir.listFiles.filter(_.isFile).sorted
- .map(_.getName)
- .filter(name => name.endsWith(".py") && !name.startsWith("_") && !name.startsWith("test"))
- .map(name => s"from mmlspark$packageString.${getBaseName(name)} import *\n").mkString("")
- writeFile(new File(dir, "__init__.py"), packageHelp(importStrings))
- dir.listFiles().filter(_.isDirectory).foreach(f =>
- makeInitFiles(packageFolder + "/" + f.getName)
- )
- }
-
- //noinspection ScalaStyle
- def generateRPackageData(): Unit = {
- // description file; need to encode version as decimal
- val today = new java.text.SimpleDateFormat("yyyy-MM-dd")
- .format(new java.util.Date())
-
- RSrcDir.mkdirs()
- writeFile(new File(RSrcDir.getParentFile, "DESCRIPTION"),
- s"""|Package: mmlspark
- |Title: Access to MMLSpark via R
- |Description: Provides an interface to MMLSpark.
- |Version: ${BuildInfo.rVersion}
- |Date: $today
- |Author: Microsoft Corporation
- |Maintainer: MMLSpark Team