Skip to content

Commit

Permalink
feat: support direct pip install (#1223)
Browse files Browse the repository at this point in the history
* feat: add publish python packages to pypi task

* add task to pack all projects into one wheel file and test in testpypi

* address comments

* fix sbt task execution order

* fix job env variable

* test

* fix env variable name

* add twineAuthenticate task

* update task

* udpate inputs

* update pipeline

* add pypiApiToken into Secrets -- this is needed to fetch it in sbt

* test pipeline job

* fixing..

* update pipeline

* address comments

* factor out packagePythonWheel and pyVersion

* fix .value usage error

Co-authored-by: Mark Hamilton <mhamilton723@gmail.com>
  • Loading branch information
serena-ruan and mhamilton723 authored Oct 27, 2021
1 parent 2771853 commit 3d92dd7
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 54 deletions.
93 changes: 80 additions & 13 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -107,22 +107,89 @@ rootGenDir := {
join(targetDir, "generated")
}

val generatePythonDoc = TaskKey[Unit]("generatePythonDoc", "Generate sphinx docs for python")
generatePythonDoc := {
installPipPackage.all(ScopeFilter(
inProjects(core, deepLearning, cognitive, vw, lightgbm, opencv),
inConfigurations(Compile))).value
mergePyCode.all(ScopeFilter(
def runTaskForAllInCompile(task: TaskKey[Unit]): Def.Initialize[Task[Seq[Unit]]] = {
task.all(ScopeFilter(
inProjects(core, deepLearning, cognitive, vw, lightgbm, opencv),
inConfigurations(Compile))
).value
val targetDir = artifactPath.in(packageBin).in(Compile).in(root).value.getParentFile
val codegenDir = join(targetDir, "generated")
val dir = join(codegenDir, "src", "python", "synapse")
)
}

val generatePythonDoc = TaskKey[Unit]("generatePythonDoc", "Generate sphinx docs for python")
generatePythonDoc := {
runTaskForAllInCompile(installPipPackage).value
runTaskForAllInCompile(mergePyCode).value
val dir = join(rootGenDir.value, "src", "python", "synapse")
join(dir, "__init__.py").createNewFile()
join(dir,"ml", "__init__.py").createNewFile()
runCmd(activateCondaEnv.value ++ Seq("sphinx-apidoc", "-f", "-o", "doc", "."), dir)
runCmd(activateCondaEnv.value ++ Seq("sphinx-build", "-b", "html", "doc", "../../../doc/pyspark"), dir)
join(dir, "ml", "__init__.py").createNewFile()
runCmd(activateCondaEnv ++ Seq("sphinx-apidoc", "-f", "-o", "doc", "."), dir)
runCmd(activateCondaEnv ++ Seq("sphinx-build", "-b", "html", "doc", "../../../doc/pyspark"), dir)
}

val packageSynapseML = TaskKey[Unit]("packageSynapseML", "package all projects into SynapseML")
packageSynapseML := {
def writeSetupFileToTarget(dir: File): Unit = {
if (!dir.exists()) {
dir.mkdir()
}
val content =
s"""
|# Copyright (C) Microsoft Corporation. All rights reserved.
|# Licensed under the MIT License. See LICENSE in project root for information.
|
|import os
|from setuptools import setup, find_namespace_packages
|import codecs
|import os.path
|
|setup(
| name="synapseml",
| version="${pythonizedVersion(version.value)}",
| description="Synpase Machine Learning",
| long_description="SynapseML contains Microsoft's open source "
| + "contributions to the Apache Spark ecosystem",
| license="MIT",
| packages=find_namespace_packages(include=['synapse.ml.*']),
| url="https://github.com/Microsoft/SynapseML",
| author="Microsoft",
| author_email="mmlspark-support@microsoft.com",
| classifiers=[
| "Development Status :: 4 - Beta",
| "Intended Audience :: Developers",
| "Intended Audience :: Science/Research",
| "Topic :: Software Development :: Libraries",
| "License :: OSI Approved :: MIT License",
| "Programming Language :: Python :: 2",
| "Programming Language :: Python :: 3",
| ],
| zip_safe=True,
| package_data={"synapseml": ["../LICENSE.txt", "../README.txt"]},
|)
|
|""".stripMargin
IO.write(join(dir, "setup.py"), content)
}

Def.sequential(
runTaskForAllInCompile(packagePython),
runTaskForAllInCompile(mergePyCode)
).value
val targetDir = rootGenDir.value
val dir = join(targetDir, "src", "python")
val packageDir = join(targetDir, "package", "python").absolutePath
writeSetupFileToTarget(dir)
packagePythonWheelCmd(packageDir, dir)
}

val publishPypi = TaskKey[Unit]("publishPypi", "publish synapseml python wheel to pypi")
publishPypi := {
packageSynapseML.value
val fn = s"${name.value}-${pythonizedVersion(version.value)}-py2.py3-none-any.whl"
runCmd(
activateCondaEnv ++
Seq("twine", "upload", "--skip-existing",
join(rootGenDir.value, "package", "python", fn).toString,
"--username", "__token__", "--password", Secrets.pypiApiToken, "--verbose")
)
}

val publishDocs = TaskKey[Unit]("publishDocs", "publish docs for scala and python")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,8 +158,8 @@ object CodeGen {
| classifiers=[
| "Development Status :: 4 - Beta",
| "Intended Audience :: Developers",
| "Intended Audience :: Data Scientists",
| "Topic :: Software Development :: Datascience Tools",
| "Intended Audience :: Science/Research",
| "Topic :: Software Development :: Libraries",
| "License :: OSI Approved :: MIT License",
| "Programming Language :: Python :: 2",
| "Programming Language :: Python :: 3",
Expand Down
1 change: 1 addition & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ dependencies:
- ipython
- pytest-codeblocks
- azure-storage-blob
- twine
26 changes: 26 additions & 0 deletions pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,32 @@ jobs:
tagSource: 'auto'
releaseNotesFile: 'CHANGELOG.md'
isDraft: true
- bash: echo "##vso[task.prependpath]$CONDA/bin"
condition: startsWith(variables['tag'], 'v')
displayName: Add conda to PATH
- bash: conda info
condition: startsWith(variables['tag'], 'v')
- bash: conda env create -f environment.yaml
condition: startsWith(variables['tag'], 'v')
displayName: Create Anaconda environment
- task: AzureKeyVault@1
condition: startsWith(variables['tag'], 'v')
inputs:
azureSubscription: 'MMLSpark Build'
keyVaultName: mmlspark-keys
- bash: |
source activate synapseml
sbt publishPypi
condition: startsWith(variables['tag'], 'v')
env:
STORAGE_KEY: $(storage-key)
NEXUS-UN: $(nexus-un)
NEXUS-PW: $(nexus-pw)
PGP-PRIVATE: $(pgp-private)
PGP-PUBLIC: $(pgp-public)
PGP-PW: $(pgp-pw)
PYPI-API-TOKEN: $(pypi-api-token)
displayName: 'publish python package to pypi'
- job: PythonTests
cancelTimeoutInMinutes: 0
Expand Down
34 changes: 12 additions & 22 deletions project/CodegenPlugin.scala
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import java.io.File
import BuildUtils.{join, runCmd, singleUploadToBlob, zipFolder}
import CondaPlugin.autoImport.{activateCondaEnv, condaEnvLocation, createCondaEnvTask}
import CondaPlugin.autoImport.{condaEnvLocation, createCondaEnvTask}
import org.apache.commons.io.FileUtils
import sbt.Keys._
import sbt.{Def, Global, Tags, _}
import spray.json._
import BuildUtils._

object CodegenConfigProtocol extends DefaultJsonProtocol {
implicit val CCFormat: RootJsonFormat[CodegenConfig] = jsonFormat8(CodegenConfig.apply)
Expand Down Expand Up @@ -35,7 +36,6 @@ object CodegenPlugin extends AutoPlugin {
val TestGenTag = Tags.Tag("testGen")

object autoImport {
val pythonizedVersion = settingKey[String]("Pythonized version")
val rVersion = settingKey[String]("R version")
val genPyPackageNamespace = settingKey[String]("genPyPackageNamespace")
val genRPackageNamespace = settingKey[String]("genRPackageNamespace")
Expand Down Expand Up @@ -79,12 +79,12 @@ object CodegenPlugin extends AutoPlugin {
publishLocal.value
val libPath = join(condaEnvLocation.value, "Lib", "R", "library").toString
val rSrcDir = join(codegenDir.value, "src", "R", genRPackageNamespace.value)
rCmd(activateCondaEnv.value,
rCmd(activateCondaEnv,
Seq("R", "CMD", "INSTALL", "--no-multiarch", "--with-keep.source", genRPackageNamespace.value),
rSrcDir.getParentFile, libPath)
val testRunner = join("tools", "tests", "run_r_tests.R")
if (join(rSrcDir,"tests").exists()){
rCmd(activateCondaEnv.value,
rCmd(activateCondaEnv,
Seq("Rscript", testRunner.getAbsolutePath), rSrcDir, libPath)
}
} tag(RInstallTag)
Expand All @@ -107,7 +107,7 @@ object CodegenPlugin extends AutoPlugin {
baseDirectory.value.getAbsolutePath,
targetDir.value.getAbsolutePath,
version.value,
pythonizedVersion.value,
pythonizedVersion(version.value),
rVersion.value,
genPyPackageNamespace.value
).toJson.compactPrint
Expand All @@ -119,7 +119,7 @@ object CodegenPlugin extends AutoPlugin {
baseDirectory.value.getAbsolutePath,
targetDir.value.getAbsolutePath,
version.value,
pythonizedVersion.value,
pythonizedVersion(version.value),
rVersion.value,
genPyPackageNamespace.value
).toJson.compactPrint
Expand Down Expand Up @@ -147,13 +147,6 @@ object CodegenPlugin extends AutoPlugin {
}
}.value),
testgen := testGenImpl.value,
pythonizedVersion := {
if (version.value.contains("-")) {
version.value.split("-".head).head + ".dev1"
} else {
version.value
}
},
rVersion := {
if (version.value.contains("-")) {
version.value.split("-".head).head
Expand All @@ -167,7 +160,7 @@ object CodegenPlugin extends AutoPlugin {
val rSrcDir = join(codegenDir.value, "src", "R", genRPackageNamespace.value)
val rPackageDir = join(codegenDir.value, "package", "R")
val libPath = join(condaEnvLocation.value, "Lib", "R", "library").toString
rCmd(activateCondaEnv.value, Seq("R", "-q", "-e", "roxygen2::roxygenise()"), rSrcDir, libPath)
rCmd(activateCondaEnv, Seq("R", "-q", "-e", "roxygen2::roxygenise()"), rSrcDir, libPath)
rPackageDir.mkdirs()
zipFolder(rSrcDir, new File(rPackageDir, s"${name.value}-${version.value}.zip"))
},
Expand All @@ -188,23 +181,20 @@ object CodegenPlugin extends AutoPlugin {
if (destPyDir.exists()) FileUtils.forceDelete(destPyDir)
val sourcePyDir = join(pythonSrcDir.getAbsolutePath, genPyPackageNamespace.value)
FileUtils.copyDirectory(sourcePyDir, destPyDir)
runCmd(
activateCondaEnv.value ++
Seq(s"python", "setup.py", "bdist_wheel", "--universal", "-d", packageDir),
pythonSrcDir)
packagePythonWheelCmd(packageDir, pythonSrcDir)
},
installPipPackage := {
packagePython.value
publishLocal.value
runCmd(
activateCondaEnv.value ++ Seq("pip", "install", "-I",
s"${name.value.replace("-", "_")}-${pythonizedVersion.value}-py2.py3-none-any.whl"),
activateCondaEnv ++ Seq("pip", "install", "-I",
s"${name.value.replace("-", "_")}-${pythonizedVersion(version.value)}-py2.py3-none-any.whl"),
join(codegenDir.value, "package", "python"))
},
publishPython := {
publishLocal.value
packagePython.value
val fn = s"${name.value.replace("-", "_")}-${pythonizedVersion.value}-py2.py3-none-any.whl"
val fn = s"${name.value.replace("-", "_")}-${pythonizedVersion(version.value)}-py2.py3-none-any.whl"
singleUploadToBlob(
join(codegenDir.value, "package", "python", fn).toString,
version.value + "/" + fn, "pip")
Expand All @@ -219,7 +209,7 @@ object CodegenPlugin extends AutoPlugin {
testgen.value
val mainTargetDir = join(baseDirectory.value.getParent, "target")
runCmd(
activateCondaEnv.value ++ Seq("python",
activateCondaEnv ++ Seq("python",
"-m",
"pytest",
s"--cov=${genPyPackageNamespace.value}",
Expand Down
22 changes: 5 additions & 17 deletions project/CondaPlugin.scala
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import BuildUtils.{osPrefix, runCmd}
import BuildUtils._
import sbt._
import Keys._

Expand All @@ -9,43 +9,31 @@ object CondaPlugin extends AutoPlugin {
override def trigger = allRequirements

object autoImport {
val condaEnvName = settingKey[String]("Name of conda environment")
val cleanCondaEnvTask = TaskKey[Unit]("cleanCondaEnv", "create conda env")
val condaEnvLocation = TaskKey[File]("condaEnvLocation", "get install location of conda env")
val createCondaEnvTask = TaskKey[Unit]("createCondaEnv", "create conda env")
val activateCondaEnv = settingKey[Seq[String]]("commands to activate conda environment")
}

import autoImport._
override lazy val globalSettings: Seq[Setting[_]] = Seq(
condaEnvName := "synapseml",
cleanCondaEnvTask := {
runCmd(Seq("conda", "env", "remove", "--name", condaEnvName.value, "-y"))
runCmd(Seq("conda", "env", "remove", "--name", condaEnvName, "-y"))
},
condaEnvLocation := {
createCondaEnvTask.value
new File(Process("conda env list").lineStream.toList
.map(_.split("\\s+"))
.map(l => (l.head, l.reverse.head))
.filter(p => p._1 == condaEnvName.value)
.filter(p => p._1 == condaEnvName)
.head._2)
},
createCondaEnvTask := {
val hasEnv = Process("conda env list").lineStream.toList
.map(_.split("\\s+").head).contains(condaEnvName.value)
.map(_.split("\\s+").head).contains(condaEnvName)
if (!hasEnv) {
runCmd(Seq("conda", "env", "create", "-f", "environment.yaml"))
} else {
println("Found conda env " + condaEnvName.value)
}
},
activateCondaEnv := {
if (sys.props("os.name").toLowerCase.contains("windows")) {
osPrefix ++ Seq("activate", condaEnvName.value, "&&")
} else {
Seq()
//TODO figure out why this doesent work
//Seq("/bin/bash", "-l", "-c", "source activate " + condaEnvName, "&&")
println("Found conda env " + condaEnvName)
}
}
)
Expand Down
1 change: 1 addition & 0 deletions project/Secrets.scala
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,6 @@ object Secrets {
sys.env.getOrElse("PGP-PRIVATE", getSecret("pgp-private")).getBytes("UTF-8")))
lazy val pgpPassword: String = sys.env.getOrElse("PGP-PW", getSecret("pgp-pw"))
lazy val storageKey: String = sys.env.getOrElse("STORAGE_KEY", getSecret("storage-key"))
lazy val pypiApiToken: String = sys.env.getOrElse("PYPI_API_TOKEN", getSecret("pypi-api-token"))

}
28 changes: 28 additions & 0 deletions project/build.scala
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@

import java.io.File
import java.lang.ProcessBuilder.Redirect

Expand All @@ -22,6 +23,13 @@ object BuildUtils {
}
}

def pythonizedVersion(version: String): String = {
version match {
case s if s.contains("-") => s.split("-".head).head + ".dev1"
case s => s
}
}

def runCmd(cmd: Seq[String],
wd: File = new File("."),
envVars: Map[String, String] = Map()): Unit = {
Expand All @@ -35,6 +43,26 @@ object BuildUtils {
assert(pb.start().waitFor() == 0)
}

def condaEnvName: String = "synapseml"

def activateCondaEnv: Seq[String] = {
if (sys.props("os.name").toLowerCase.contains("windows")) {
osPrefix ++ Seq("activate", condaEnvName, "&&")
} else {
Seq()
//TODO figure out why this doesent work
//Seq("/bin/bash", "-l", "-c", "source activate " + condaEnvName, "&&")
}
}

def packagePythonWheelCmd(packageDir: String,
workDir: File = new File(".")): Unit = {
runCmd(
activateCondaEnv ++
Seq(s"python", "setup.py", "bdist_wheel", "--universal", "-d", packageDir),
workDir)
}

def uploadToBlob(source: String,
dest: String,
container: String,
Expand Down

0 comments on commit 3d92dd7

Please sign in to comment.