Skip to content

Commit

Permalink
[ML-26] Build for different spark version by -Pprofile (#39)
Browse files Browse the repository at this point in the history
* Support Spark-3.0.0, 3.0.2, 3.1.1 by profile

reorganize folders for build by profile

update KMeans DAL for profile spark-3.0.2

update KMeans DAL for profile spark-3.1.1; update PCA DAL for spark-3.0.2 and spark-3.1.1.

Update oap-mllib-ci.yml

Add tests for building by profile

Update pom.xml

remove duplicated <spark.version>3.0.0</spark.version> as it is set in the default activated profile

updated README.md

* add spark v3.0.1 support

* update ci-test to support profile

* workaround the CI test environment setup issue

* enable CI test for different profile

* fix a bug in ci-test.sh

* minor update based on review comments

* use argument instead of env var to pass profile to build.sh and ci-build.sh

* use argument instead of env var to pass profile to test.sh

* minor update based on review comments
  • Loading branch information
bobjiang82 authored Apr 30, 2021
1 parent b710f5d commit a2df040
Showing 25 changed files with 9,690 additions and 34 deletions.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -161,7 +161,11 @@ To build, run the following commands:
$ cd mllib-dal
$ ./build.sh
```

The target can be built against different Spark versions by specifying profile with <spark-x.x.x>. E.g.
```
$ ./build.sh spark-3.1.1
```
If no profile parameter is given, the Spark version 3.0.0 will be activated by default.
The built JAR package will be placed in `target` directory with the name `oap-mllib-x.x.x-with-spark-x.x.x.jar`.

## Examples
13 changes: 12 additions & 1 deletion dev/ci-build.sh
Original file line number Diff line number Diff line change
@@ -30,7 +30,14 @@ if [[ -z $CCL_ROOT ]]; then
exit 1
fi

if [[ -z $1 ]]; then
echo SPARK_VER not defined, using default.
else
SPARK_VER=$1
fi

echo === Building Environments ===
echo SPARK_VER=$SPARK_VER
echo JAVA_HOME=$JAVA_HOME
echo DAALROOT=$DAALROOT
echo TBBROOT=$TBBROOT
@@ -40,4 +47,8 @@ echo Clang Version: $(clang -dumpversion)
echo =============================

cd $GITHUB_WORKSPACE/mllib-dal
mvn --no-transfer-progress -DskipTests clean package
if [[ -z $SPARK_VER ]]; then
mvn --no-transfer-progress -DskipTests clean package
else
mvn -P$SPARK_VER --no-transfer-progress -DskipTests clean package
fi
28 changes: 14 additions & 14 deletions dev/ci-test.sh
Original file line number Diff line number Diff line change
@@ -39,23 +39,23 @@ echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ")
echo Clang Version: $(clang -dumpversion)
echo =============================

cd $GITHUB_WORKSPACE/mllib-dal

# Build test
$GITHUB_WORKSPACE/dev/ci-build.sh
SupportedSparkVersions=("spark-3.0.0" "spark-3.0.1" "spark-3.0.2" "spark-3.1.1")

# Enable signal chaining support for JNI
# export LD_PRELOAD=$JAVA_HOME/jre/lib/amd64/libjsig.so
for SparkVer in ${SupportedSparkVersions[*]}; do
echo ""
echo "========================================"
echo "Profile: $SparkVer"
echo "========================================"

# -Dtest=none to turn off the Java tests
cd $GITHUB_WORKSPACE/mllib-dal
# Build test with profile
$GITHUB_WORKSPACE/dev/ci-build.sh $SparkVer

# Test all
# mvn -Dtest=none -Dmaven.test.skip=false test
mvn --no-transfer-progress -P$SparkVer -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test
mvn --no-transfer-progress -P$SparkVer -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test
# mvn -P$SparkVer -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test
done

# Individual test
mvn --no-transfer-progress -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test
mvn --no-transfer-progress -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test
# mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test

# Yarn cluster test
# Yarn cluster test without profile
$GITHUB_WORKSPACE/dev/test-cluster/ci-test-cluster.sh
5 changes: 0 additions & 5 deletions dev/setup-all.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,5 @@
#!/usr/bin/env bash

# Setup hosts
# Use second internal IP, use first IP will be SSH timeout
HOST_IP=$(hostname -I | cut -f2 -d" ")
echo $HOST_IP $(hostname) | sudo tee -a /etc/hosts

# Install dependencies for building
$GITHUB_WORKSPACE/dev/install-build-deps-ubuntu.sh

3 changes: 1 addition & 2 deletions dev/test-cluster/setup-cluster.sh
Original file line number Diff line number Diff line change
@@ -15,8 +15,7 @@ cd ~/opt

cd $WORK_DIR

# Use second internal IP, use first IP will be SSH timeout
HOST_IP=$(hostname -I | cut -f2 -d" ")
HOST_IP=$(hostname -f)

sed -i "s/localhost/$HOST_IP/g" core-site.xml
sed -i "s/localhost/$HOST_IP/g" yarn-site.xml
13 changes: 12 additions & 1 deletion mllib-dal/build.sh
Original file line number Diff line number Diff line change
@@ -26,7 +26,14 @@ if [[ -z $CCL_ROOT ]]; then
exit 1
fi

if [[ -z $1 ]]; then
echo SPARK_VER not defined, using default.
else
SPARK_VER=$1
fi

echo === Building Environments ===
echo SPARK_VER=$SPARK_VER
echo JAVA_HOME=$JAVA_HOME
echo DAALROOT=$DAALROOT
echo TBBROOT=$TBBROOT
@@ -35,4 +42,8 @@ echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ")
echo Clang Version: $(clang -dumpversion)
echo =============================

mvn -DskipTests clean package
if [[ -z $SPARK_VER ]]; then
mvn -DskipTests clean package
else
mvn -P$SPARK_VER -DskipTests clean package
fi
97 changes: 95 additions & 2 deletions mllib-dal/pom.xml
Original file line number Diff line number Diff line change
@@ -78,8 +78,8 @@

<dependency>
<groupId>org.scalatest</groupId>
<artifactId>scalatest_2.12</artifactId>
<version>3.0.8</version>
<artifactId>scalatest_${scala.binary.version}</artifactId>
<version>${scalatest.version}</version>
<scope>test</scope>
</dependency>

@@ -146,16 +146,108 @@

</dependencies>

<profiles>

<profile>
<id>spark-3.0.0</id>
<activation>
<activeByDefault>true</activeByDefault>
</activation>
<properties>
<spark.version>3.0.0</spark.version>
<scalatest.version>3.0.8</scalatest.version>
</properties>
<dependencyManagement>
<dependencies>
</dependencies>
</dependencyManagement>
<build>
<pluginManagement>
<plugins>
</plugins>
</pluginManagement>
</build>
</profile>

<profile>
<id>spark-3.0.1</id>
<properties>
<spark.version>3.0.1</spark.version>
<scalatest.version>3.0.8</scalatest.version>
</properties>
<dependencyManagement>
<dependencies>
</dependencies>
</dependencyManagement>
<build>
<pluginManagement>
<plugins>
</plugins>
</pluginManagement>
</build>
</profile>

<profile>
<id>spark-3.0.2</id>
<properties>
<spark.version>3.0.2</spark.version>
<scalatest.version>3.0.8</scalatest.version>
</properties>
<dependencyManagement>
<dependencies>
</dependencies>
</dependencyManagement>
<build>
<pluginManagement>
<plugins>
</plugins>
</pluginManagement>
</build>
</profile>

<profile>
<id>spark-3.1.1</id>
<properties>
<spark.version>3.1.1</spark.version>
<scalatest.version>3.2.3</scalatest.version>
</properties>
<dependencyManagement>
<dependencies>
</dependencies>
</dependencyManagement>
<build>
<pluginManagement>
<plugins>
</plugins>
</pluginManagement>
</build>
</profile>
</profiles>

<build>
<plugins>

<plugin>
<groupId>org.scala-tools</groupId>
<artifactId>maven-scala-plugin</artifactId>
<version>2.15.2</version>
<executions>
<execution>
<id>compile</id>
<goals>
<goal>compile</goal>
</goals>
<configuration>
<includes>
<include>**/*.java</include>
<include>**/spark/**</include>
<include>**/spark-${spark.version}/**</include>
</includes>
</configuration>
</execution>
<execution>
<id>testCompile</id>
<goals>
<goal>testCompile</goal>
</goals>
</execution>
@@ -253,6 +345,7 @@
</resources>
</configuration>
</plugin>

<plugin>
<groupId>com.coderplus.maven.plugins</groupId>
<artifactId>copy-rename-maven-plugin</artifactId>
Loading

0 comments on commit a2df040

Please sign in to comment.