diff --git a/.github/workflows/oap-mllib-ci.yml b/.github/workflows/oap-mllib-ci.yml
index 2c6973321..4f567086f 100644
--- a/.github/workflows/oap-mllib-ci.yml
+++ b/.github/workflows/oap-mllib-ci.yml
@@ -11,32 +11,21 @@ jobs:
       - name: Set up JDK 1.8
         uses: actions/setup-java@v1
         with:
-          java-version: 1.8
+          java-version: 1.8      
       - name: Restore cached dependencies
         uses: actions/cache@v2
         with:
           path: |
+            # /var/cache/apt/archives/*.deb
             ~/.m2/repository
-            ~/downloads
-            /opt/intel/inteloneapi
             /opt/intel/oneapi
+            ~/opt
           key: ${{ runner.os }}-${{ hashFiles('**/pom.xml', '{{github.workspace}}/dev/install-build-deps-ubuntu.sh') }}
           restore-keys: |
             ${{ runner.os }}-
-      - name: Set up dependencies
-        run: |
-          [ -d ~/downloads ] || mkdir ~/downloads
-          cd ~/downloads
-          [ -f spark-3.0.0-bin-hadoop2.7.tgz ] || wget http://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz
-          [ -d spark-3.0.0-bin-hadoop2.7 ] || cd ~ && tar -zxf downloads/spark-3.0.0-bin-hadoop2.7.tgz
-          export SPARK_HOME=~/spark-3.0.0-bin-hadoop2.7
-          ${{github.workspace}}/dev/install-build-deps-ubuntu.sh
+      - name: Set up environments
+        run: |          
+          source ${{github.workspace}}/dev/setup-all.sh
       - name: Build and Test
-        run: |
-          cd ${{github.workspace}}/mllib-dal      
-          export ONEAPI_ROOT=/opt/intel/oneapi
-          source /opt/intel/oneapi/dal/latest/env/vars.sh
-          source /opt/intel/oneapi/tbb/latest/env/vars.sh
-          source /tmp/oneCCL/build/_install/env/setvars.sh
-          # temp disable and will enable for new release of oneCCL
-          #./build.sh
+        run: |          
+          ${{github.workspace}}/dev/ci-test.sh
diff --git a/README.md b/README.md
index 477c74b1f..c9e003ba5 100644
--- a/README.md
+++ b/README.md
@@ -11,13 +11,13 @@ For those algorithms that are not accelerated by OAP MLlib, the original Spark M
 
 ## Online Documentation
 
-You can find the all the OAP MLlib documents on the [project web page](https://oap-project.github.io/oap-mllib/).
+You can find the all the OAP MLlib documents on the [project web page](https://oap-project.github.io/oap-mllib).
 
 ## Getting Started
 
 ### Java/Scala Users Preferred
 
-Use a pre-built OAP MLlib JAR to get started. You can firstly download OAP package from [OAP-JARs-Tarball](https://github.com/Intel-bigdata/OAP/releases/download/v1.0.0-spark-3.0.0/oap-1.0.0-bin-spark-3.0.0.tar.gz) and extract this Tarball to get `oap-mllib-x.x.x-with-spark-x.x.x.jar` under `oap-1.0.0-bin-spark-3.0.0/jars`.
+Use a pre-built OAP MLlib JAR to get started. You can firstly download OAP package from [OAP-JARs-Tarball](https://github.com/Intel-bigdata/OAP/releases/download/v1.1.0-spark-3.0.0/oap-1.1.0-bin-spark-3.0.0.tar.gz) and extract this Tarball to get `oap-mllib-x.x.x-with-spark-x.x.x.jar` under `oap-1.1.0-bin-spark-3.0.0/jars`.
 
 Then you can refer to the following [Running](#running) section to try out.
 
@@ -58,24 +58,31 @@ spark.executor.extraClassPath     ./oap-mllib-x.x.x-with-spark-x.x.x.jar
 
 ### Sanity Check
 
-To use K-means example for sanity check, you need to upload a data file to your HDFS and change related variables in `run.sh` of kmeans example. Then run the following commands:
+#### Setup `env.sh`
 ```
-    $ cd oap-mllib/examples/kmeans
-    $ ./build.sh
-    $ ./run.sh
+    $ cd conf
+    $ cp env.sh.template env.sh
 ```
+Edit related variables in "`Minimun Settings`" of `env.sh`
 
-### Benchmark with HiBench
-Use [Hibench](https://github.com/Intel-bigdata/HiBench) to generate dataset with various profiles, and change related variables in `run-XXX.sh` script when applicable.  Then run the following commands:
+#### Upload example data files to HDFS
 ```
-    $ cd oap-mllib/examples/kmeans-hibench
+    $ cd examples
+    $ hadoop fs -mkdir -p /user/$USER
+    $ hadoop fs -copyFromLocal data
+    $ hadoop fs -ls data
+```
+#### Run K-means
+
+```
+    $ cd examples/kmeans
     $ ./build.sh
-    $ ./run-hibench-oap-mllib.sh
+    $ ./run.sh
 ```
 
 ### PySpark Support
 
-As PySpark-based applications call their Scala couterparts, they shall be supported out-of-box. An example can be found in the [Examples](#examples) section.
+As PySpark-based applications call their Scala couterparts, they shall be supported out-of-box. Examples can be found in the [Examples](#examples) section.
 
 ## Building
 
@@ -86,7 +93,8 @@ We use [Apache Maven](https://maven.apache.org/) to manage and build source code
 * JDK 8.0+
 * Apache Maven 3.6.2+
 * GNU GCC 4.8.5+
-* Intel® oneAPI Toolkits 2021.1.1 Components: 
+* Intel® oneAPI Toolkits 2021.2+ Components:
+    - DPC++/C++ Compiler (dpcpp/clang++)
     - Data Analytics Library (oneDAL)
     - Threading Building Blocks (oneTBB)
 * [Open Source Intel® oneAPI Collective Communications Library (oneCCL)](https://github.com/oneapi-src/oneCCL)
@@ -95,7 +103,7 @@ Intel® oneAPI Toolkits and its components can be downloaded and install from [h
 
 More details about oneAPI can be found [here](https://software.intel.com/content/www/us/en/develop/tools/oneapi.html).
 
-You can also refer to [this script and comments in it](https://github.com/Intel-bigdata/OAP/blob/branch-1.0-spark-3.x/oap-mllib/dev/install-build-deps-centos.sh) to install correct oneAPI version and manually setup the environments.
+You can refer to [this script](dev/install-build-deps-centos.sh) to install correct dependencies.
 
 Scala and Java dependency descriptions are already included in Maven POM file. 
 
@@ -107,7 +115,7 @@ To clone and build from open source oneCCL, run the following commands:
 ```
 	$ git clone https://github.com/oneapi-src/oneCCL
         $ cd oneCCL
-        $ git checkout beta08
+        $ git checkout 2021.2
 	$ mkdir build && cd build
 	$ cmake ..
 	$ make -j install
@@ -138,17 +146,19 @@ CCL_ROOT    | Path to oneCCL home directory
 We suggest you to source `setvars.sh` script into current shell to setup building environments as following:
 
 ```
-	$ source /opt/intel/inteloneapi/setvars.sh
+	$ source /opt/intel/oneapi/setvars.sh
 	$ source /your/oneCCL_source_code/build/_install/env/setvars.sh
 ```
 
 __Be noticed we are using our own built oneCCL instead, we should source oneCCL's `setvars.sh` to overwrite oneAPI one.__
 
+You can also refer to [this CI script](dev/ci-build.sh) to setup the building environments.
+
 If you prefer to buid your own open source [oneDAL](https://github.com/oneapi-src/oneDAL), [oneTBB](https://github.com/oneapi-src/oneTBB) versions rather than use the ones included in oneAPI TookKits, you can refer to the related build instructions and manually source `setvars.sh` accordingly.
 
 To build, run the following commands: 
 ```
-    $ cd oap-mllib/mllib-dal
+    $ cd mllib-dal
     $ ./build.sh
 ```
 
@@ -156,12 +166,19 @@ The built JAR package will be placed in `target` directory with the name `oap-ml
 
 ## Examples
 
-Example         |  Description 
+Example         |  Description
 ----------------|---------------------------
 kmeans          |  K-means example for Scala
 kmeans-pyspark  |  K-means example for PySpark
-kmeans-hibench  |  Use HiBench-generated input dataset to benchmark K-means performance
+pca             |  PCA example for Scala
+pca-pyspark     |  PCA example for PySpark
+als             |  ALS example for Scala
+als-pyspark     |  ALS example for PySpark
 
 ## List of Accelerated Algorithms
 
-* K-Means (CPU, Experimental)
+Algorithm | Category | Maturity
+----------|----------|-------------
+K-Means   | CPU      | Experimental
+PCA       | CPU      | Experimental
+ALS       | CPU      | Experimental
diff --git a/conf/env.sh.template b/conf/env.sh.template
new file mode 100644
index 000000000..ddcc84665
--- /dev/null
+++ b/conf/env.sh.template
@@ -0,0 +1,46 @@
+# == OAP MLlib users to customize the following environments for running examples ======= #
+
+# ============== Minimum Settings ============= #
+
+# Set OAP MLlib version (e.g. 1.1.0)
+OAP_MLLIB_VERSION=x.x.x
+# Set Spark master
+SPARK_MASTER=yarn
+# Set Hadoop home path
+export HADOOP_HOME=/path/to/your/hadoop/home
+# Set Spark home path
+export SPARK_HOME=/path/to/your/spark/home
+# Set HDFS Root, should be hdfs://xxx or file://xxx
+export HDFS_ROOT=hdfs://localhost:8020
+# Set OAP MLlib source code root directory
+export OAP_MLLIB_ROOT=/path/to/oap-mllib/home
+
+# ============================================= #
+
+# Set HADOOP_CONF_DIR for Spark
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+
+# Set JAR name & path
+OAP_MLLIB_JAR_NAME=oap-mllib-$OAP_MLLIB_VERSION.jar
+OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
+# Set Spark driver & executor classpaths, 
+# absolute path for driver, relative path for executor
+SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
+SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
+
+# Set Spark resources, can be overwritten in example
+SPARK_DRIVER_MEMORY=1G
+SPARK_NUM_EXECUTORS=2
+SPARK_EXECUTOR_CORES=1
+SPARK_EXECUTOR_MEMORY=1G
+SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
+
+# Checks
+
+for dir in $SPARK_HOME $HADOOP_HOME $OAP_MLLIB_JAR 
+do
+    if [[ ! -e $dir ]]; then
+        echo $dir does not exist!
+    exit 1
+    fi
+done
diff --git a/dev/ci-build.sh b/dev/ci-build.sh
new file mode 100755
index 000000000..6f70ea372
--- /dev/null
+++ b/dev/ci-build.sh
@@ -0,0 +1,43 @@
+#!/usr/bin/env bash
+
+# Setup building envs
+source /opt/intel/oneapi/setvars.sh
+source /tmp/oneCCL/build/_install/env/setvars.sh
+
+# Check envs for building
+if [[ -z $JAVA_HOME ]]; then
+ echo JAVA_HOME not defined!
+ exit 1
+fi
+
+if [[ -z $(which mvn) ]]; then
+ echo Maven not found!
+ exit 1
+fi
+
+if [[ -z $DAALROOT ]]; then
+ echo DAALROOT not defined!
+ exit 1
+fi
+
+if [[ -z $TBBROOT ]]; then
+ echo TBBROOT not defined!
+ exit 1
+fi
+
+if [[ -z $CCL_ROOT ]]; then
+ echo CCL_ROOT not defined!
+ exit 1
+fi
+
+echo === Building Environments ===
+echo JAVA_HOME=$JAVA_HOME
+echo DAALROOT=$DAALROOT
+echo TBBROOT=$TBBROOT
+echo CCL_ROOT=$CCL_ROOT
+echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ")
+echo Clang Version: $(clang -dumpversion)
+echo =============================
+
+cd $GITHUB_WORKSPACE/mllib-dal
+mvn --no-transfer-progress -DskipTests clean package
diff --git a/dev/ci-test.sh b/dev/ci-test.sh
new file mode 100755
index 000000000..7ce665089
--- /dev/null
+++ b/dev/ci-test.sh
@@ -0,0 +1,61 @@
+#!/usr/bin/env bash
+
+# Setup building envs
+source /opt/intel/oneapi/setvars.sh
+source /tmp/oneCCL/build/_install/env/setvars.sh
+
+# Check envs for building
+if [[ -z $JAVA_HOME ]]; then
+ echo JAVA_HOME not defined!
+ exit 1
+fi
+
+if [[ -z $(which mvn) ]]; then
+ echo Maven not found!
+ exit 1
+fi
+
+if [[ -z $DAALROOT ]]; then
+ echo DAALROOT not defined!
+ exit 1
+fi
+
+if [[ -z $TBBROOT ]]; then
+ echo TBBROOT not defined!
+ exit 1
+fi
+
+if [[ -z $CCL_ROOT ]]; then
+ echo CCL_ROOT not defined!
+ exit 1
+fi
+
+echo === Testing Environments ===
+echo JAVA_HOME=$JAVA_HOME
+echo DAALROOT=$DAALROOT
+echo TBBROOT=$TBBROOT
+echo CCL_ROOT=$CCL_ROOT
+echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ")
+echo Clang Version: $(clang -dumpversion)
+echo =============================
+
+cd $GITHUB_WORKSPACE/mllib-dal
+
+# Build test
+$GITHUB_WORKSPACE/dev/ci-build.sh
+
+# Enable signal chaining support for JNI
+# export LD_PRELOAD=$JAVA_HOME/jre/lib/amd64/libjsig.so
+
+# -Dtest=none to turn off the Java tests
+
+# Test all
+# mvn -Dtest=none -Dmaven.test.skip=false test
+
+# Individual test
+mvn --no-transfer-progress -Dtest=none -DwildcardSuites=org.apache.spark.ml.clustering.IntelKMeansSuite test
+mvn --no-transfer-progress -Dtest=none -DwildcardSuites=org.apache.spark.ml.feature.IntelPCASuite test
+# mvn -Dtest=none -DwildcardSuites=org.apache.spark.ml.recommendation.IntelALSSuite test
+
+# Yarn cluster test
+$GITHUB_WORKSPACE/dev/test-cluster/ci-test-cluster.sh
\ No newline at end of file
diff --git a/dev/install-build-deps-centos.sh b/dev/install-build-deps-centos.sh
index 8a347fdef..5f8a6e5c8 100755
--- a/dev/install-build-deps-centos.sh
+++ b/dev/install-build-deps-centos.sh
@@ -12,8 +12,10 @@ gpgcheck=1
 repo_gpgcheck=1
 gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
 EOF
-  sudo mv /tmp/oneAPI.repo /etc/yum.repos.d
-  sudo yum install -y intel-oneapi-dal-devel-2021.1.1 intel-oneapi-tbb-devel-2021.1.1
+  sudo mv /tmp/oneAPI.repo /etc/yum.repos.d  
+  # sudo yum groupinstall -y "Development Tools"
+  # sudo yum install -y cmake
+  sudo yum install -y intel-oneapi-dpcpp-cpp-2021.2.0 intel-oneapi-dal-devel-2021.2.0 intel-oneapi-tbb-devel-2021.2.0
 else
   echo "oneAPI components already installed!"
 fi  
@@ -23,16 +25,7 @@ cd /tmp
 rm -rf oneCCL
 git clone https://github.com/oneapi-src/oneCCL
 cd oneCCL
-git checkout 2021.1
-mkdir -p build && cd build
+git checkout 2021.2
+mkdir build && cd build
 cmake ..
-make -j 2 install
-
-#
-# Setup building environments manually:
-#
-# export ONEAPI_ROOT=/opt/intel/oneapi
-# source /opt/intel/oneapi/dal/latest/env/vars.sh
-# source /opt/intel/oneapi/tbb/latest/env/vars.sh
-# source /tmp/oneCCL/build/_install/env/setvars.sh
-#
+make -j 2 install
\ No newline at end of file
diff --git a/dev/install-build-deps-ubuntu.sh b/dev/install-build-deps-ubuntu.sh
index d43e35b89..36fafc1f6 100755
--- a/dev/install-build-deps-ubuntu.sh
+++ b/dev/install-build-deps-ubuntu.sh
@@ -1,6 +1,6 @@
 #!/usr/bin/env bash
 
-if [ ! -f /opt/intel/oneapi ]; then
+if [ ! -d /opt/intel/oneapi ]; then
   echo "Installing oneAPI components ..."
   cd /tmp
   wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
@@ -8,25 +8,18 @@ if [ ! -f /opt/intel/oneapi ]; then
   rm GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB
   echo "deb https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
   sudo apt-get update
-  sudo apt-get install intel-oneapi-dal-devel-2021.1.1 intel-oneapi-tbb-devel-2021.1.1
+  # sudo apt-get install -y build-essential cmake
+  sudo apt-get install -y intel-oneapi-dpcpp-cpp-2021.2.0 intel-oneapi-dal-devel-2021.2.0 intel-oneapi-tbb-devel-2021.2.0
 else
   echo "oneAPI components already installed!"
-fi  
+fi
 
 echo "Building oneCCL ..."
 cd /tmp
+rm -rf oneCCL
 git clone https://github.com/oneapi-src/oneCCL
 cd oneCCL
-git checkout 2021.1
+git checkout 2021.2
 mkdir build && cd build
 cmake ..
 make -j 2 install
-
-#
-# Setup building environments manually:
-#
-# export ONEAPI_ROOT=/opt/intel/oneapi
-# source /opt/intel/oneapi/dal/latest/env/vars.sh
-# source /opt/intel/oneapi/tbb/latest/env/vars.sh
-# source /tmp/oneCCL/build/_install/env/setvars.sh
-#
diff --git a/dev/setup-all.sh b/dev/setup-all.sh
new file mode 100755
index 000000000..75defbe5a
--- /dev/null
+++ b/dev/setup-all.sh
@@ -0,0 +1,16 @@
+#!/usr/bin/env bash
+
+# Setup hosts
+# Use second internal IP, use first IP will be SSH timeout
+HOST_IP=$(hostname -I | cut -f2 -d" ")
+echo $HOST_IP $(hostname) | sudo tee -a /etc/hosts
+
+# Install dependencies for building
+$GITHUB_WORKSPACE/dev/install-build-deps-ubuntu.sh
+
+# Setup password-less & python3
+$GITHUB_WORKSPACE/dev/test-cluster/config-ssh.sh
+$GITHUB_WORKSPACE/dev/test-cluster/setup-python3.sh
+
+# Setup cluster and envs
+source $GITHUB_WORKSPACE/dev/test-cluster/setup-cluster.sh
diff --git a/dev/test-cluster/ci-test-cluster.sh b/dev/test-cluster/ci-test-cluster.sh
new file mode 100755
index 000000000..7a4600267
--- /dev/null
+++ b/dev/test-cluster/ci-test-cluster.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env bash
+
+# Setup Spark envs
+source $GITHUB_WORKSPACE/dev/test-cluster/setup-spark-envs.sh
+
+# Setup OAP MLlib envs
+cp $GITHUB_WORKSPACE/dev/test-cluster/env.sh $GITHUB_WORKSPACE/conf
+
+cd $GITHUB_WORKSPACE/examples
+
+# Copy examples data to HDFS
+hadoop fs -mkdir -p /user/$USER
+hadoop fs -copyFromLocal data
+hadoop fs -ls data
+
+# Build and run all examples
+./build-all.sh
+./run-all-scala.sh
+./run-all-pyspark.sh
diff --git a/dev/test-cluster/config-ssh.sh b/dev/test-cluster/config-ssh.sh
index d093fa17a..a6fc2699e 100755
--- a/dev/test-cluster/config-ssh.sh
+++ b/dev/test-cluster/config-ssh.sh
@@ -2,5 +2,14 @@
 
 ssh-keygen -q -N "" -t rsa -f ~/.ssh/id_rsa
 cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
+chmod 600 ~/.ssh/authorized_keys
+
+# Disable strict host key checking
 echo "    StrictHostKeyChecking no                     " | sudo tee -a /etc/ssh/ssh_config
-sudo service ssh restart
+# Disable strict modes for less strict permission checking
+echo "StrictModes no" | sudo tee -a /etc/ssh/sshd_config
+
+ls -ld ~/.ssh
+ls -l ~/.ssh
+
+sudo systemctl restart ssh
diff --git a/dev/test-cluster/env.sh b/dev/test-cluster/env.sh
new file mode 100644
index 000000000..225db0b7b
--- /dev/null
+++ b/dev/test-cluster/env.sh
@@ -0,0 +1,46 @@
+# == OAP MLlib users to customize the following environments for running examples ======= #
+
+# ============== Minimum Settings ============= #
+
+# Set OAP MLlib version (e.g. 1.1.0)
+OAP_MLLIB_VERSION=1.1.0
+# Set Spark master
+SPARK_MASTER=yarn
+# Set Hadoop home path
+export HADOOP_HOME=$HADOOP_HOME
+# Set Spark home path
+export SPARK_HOME=$SPARK_HOME
+# Set HDFS Root, should be hdfs://xxx or file://xxx
+export HDFS_ROOT=hdfs://localhost:8020
+# Set OAP MLlib source code root directory
+export OAP_MLLIB_ROOT=$GITHUB_WORKSPACE
+
+# ============================================= #
+
+# Set HADOOP_CONF_DIR for Spark
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+
+# Set JAR name & path
+OAP_MLLIB_JAR_NAME=oap-mllib-$OAP_MLLIB_VERSION.jar
+OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
+# Set Spark driver & executor classpaths, 
+# absolute path for driver, relative path for executor
+SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
+SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
+
+# Set Spark resources, can be overwritten in example
+SPARK_DRIVER_MEMORY=1G
+SPARK_NUM_EXECUTORS=2
+SPARK_EXECUTOR_CORES=1
+SPARK_EXECUTOR_MEMORY=1G
+SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
+
+# Checks
+
+for dir in $SPARK_HOME $HADOOP_HOME $OAP_MLLIB_JAR 
+do
+    if [[ ! -e $dir ]]; then
+        echo $dir does not exist!
+        exit 1
+    fi
+done
diff --git a/dev/test-cluster/envs.sh b/dev/test-cluster/envs.sh
deleted file mode 100644
index 71e8506e6..000000000
--- a/dev/test-cluster/envs.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-# Set user Spark and Hadoop home directory
-export HADOOP_HOME=~/opt/hadoop-2.7.7
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-export SPARK_HOME=~/opt/spark-3.0.0-bin-hadoop2.7
-
-export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
-export PYSPARK_PYTHON=python3
-
-# Set user HDFS Root
-export HDFS_ROOT=hdfs://localhost:8020
-export OAP_MLLIB_DATA_ROOT=OAPMLlib/Data
-# Set user Intel MLlib Root directory
-export OAP_MLLIB_ROOT=${GITHUB_WORKSPACE}
-
-# Target jar built
-OAP_MLLIB_JAR_NAME=oap-mllib-1.1.0.jar
-OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
-
-# Use absolute path
-SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
-# Use relative path
-SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
diff --git a/dev/test-cluster/hadoop-env.sh b/dev/test-cluster/hadoop-env.sh
index bee6c1f69..f60b65a0b 100755
--- a/dev/test-cluster/hadoop-env.sh
+++ b/dev/test-cluster/hadoop-env.sh
@@ -22,8 +22,7 @@
 # remote nodes.
 
 # The java implementation to use.
-# export JAVA_HOME=${JAVA_HOME}
-export JAVA_HOME=/usr/local/lib/jvm/openjdk8
+export JAVA_HOME=${JAVA_HOME}
 
 # The jsvc implementation to use. Jsvc is required to run secure datanodes
 # that bind to privileged ports to provide authentication of data transfer
diff --git a/dev/test-cluster/setup-cluster.sh b/dev/test-cluster/setup-cluster.sh
index eea058f80..b58c676ab 100755
--- a/dev/test-cluster/setup-cluster.sh
+++ b/dev/test-cluster/setup-cluster.sh
@@ -6,33 +6,38 @@ cd $WORK_DIR
 
 echo JAVA_HOME is $JAVA_HOME
 
-mkdir ~/opt
+[ -d ~/opt ] || mkdir ~/opt
 cd ~/opt
-wget https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz
-tar -xzf spark-3.0.0-bin-hadoop2.7.tgz
-wget https://archive.apache.org/dist/hadoop/core/hadoop-2.7.7/hadoop-2.7.7.tar.gz
-tar -xzf hadoop-2.7.7.tar.gz
+[ -f spark-3.0.0-bin-hadoop2.7.tgz ] || wget --no-verbose https://archive.apache.org/dist/spark/spark-3.0.0/spark-3.0.0-bin-hadoop2.7.tgz
+[ -d spark-3.0.0-bin-hadoop2.7 ] || tar -xzf spark-3.0.0-bin-hadoop2.7.tgz
+[ -f hadoop-2.7.7.tar.gz ] || wget --no-verbose https://archive.apache.org/dist/hadoop/core/hadoop-2.7.7/hadoop-2.7.7.tar.gz
+[ -d hadoop-2.7.7 ] || tar -xzf hadoop-2.7.7.tar.gz
 
 cd $WORK_DIR
 
+# Use second internal IP, use first IP will be SSH timeout
+HOST_IP=$(hostname -I | cut -f2 -d" ")
+
+sed -i "s/localhost/$HOST_IP/g" core-site.xml
+sed -i "s/localhost/$HOST_IP/g" yarn-site.xml
+
 cp ./core-site.xml ~/opt/hadoop-2.7.7/etc/hadoop/
 cp ./hdfs-site.xml ~/opt/hadoop-2.7.7/etc/hadoop/
 cp ./yarn-site.xml ~/opt/hadoop-2.7.7/etc/hadoop/
 cp ./hadoop-env.sh ~/opt/hadoop-2.7.7/etc/hadoop/
 cp ./spark-defaults.conf ~/opt/spark-3.0.0-bin-hadoop2.7/conf
 
+source ./setup-spark-envs.sh
+
+echo $HOST_IP > $HADOOP_HOME/etc/hadoop/slaves
+echo $HOST_IP > $SPARK_HOME/conf/slaves
+
 # create directories
 mkdir -p /tmp/run/hdfs/namenode
 mkdir -p /tmp/run/hdfs/datanode
 
 # hdfs format
-~/opt/hadoop-2.7.7/bin/hdfs namenode -format
-
-export HADOOP_HOME=~/opt/hadoop-2.7.7
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-export SPARK_HOME=~/opt/spark-3.0.0-bin-hadoop2.7
-
-export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PATH
+$HADOOP_HOME/bin/hdfs namenode -format
 
 # start hdfs and yarn
 $HADOOP_HOME/sbin/start-dfs.sh
diff --git a/dev/test-cluster/setup-python3-env.sh b/dev/test-cluster/setup-python3.sh
similarity index 100%
rename from dev/test-cluster/setup-python3-env.sh
rename to dev/test-cluster/setup-python3.sh
diff --git a/dev/test-cluster/setup-spark-envs.sh b/dev/test-cluster/setup-spark-envs.sh
new file mode 100755
index 000000000..3819f6ee8
--- /dev/null
+++ b/dev/test-cluster/setup-spark-envs.sh
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+
+export HADOOP_HOME=~/opt/hadoop-2.7.7
+export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native
+
+export SPARK_HOME=~/opt/spark-3.0.0-bin-hadoop2.7
+export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
+export PYSPARK_PYTHON=python3
+
+export PATH=$HADOOP_HOME/bin:$SPARK_HOME/bin:$PATH
\ No newline at end of file
diff --git a/dev/test-cluster/spark-defaults.conf b/dev/test-cluster/spark-defaults.conf
index 1c25bb2ec..05f1c31e3 100644
--- a/dev/test-cluster/spark-defaults.conf
+++ b/dev/test-cluster/spark-defaults.conf
@@ -28,7 +28,7 @@
 
 spark.master                     yarn	
 spark.serializer                 org.apache.spark.serializer.KryoSerializer
-spark.driver.memory              3g
+spark.driver.memory              1g
 spark.executor.num               2
 spark.executor.cores             1
-spark.executor.memory            4g
+spark.executor.memory            2g
diff --git a/dev/test-cluster/workloads/kmeans-pyspark.py b/dev/test-cluster/workloads/kmeans-pyspark.py
deleted file mode 100644
index cf93e6034..000000000
--- a/dev/test-cluster/workloads/kmeans-pyspark.py
+++ /dev/null
@@ -1,70 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""
-An example demonstrating k-means clustering.
-Run with:
-  bin/spark-submit examples/src/main/python/ml/kmeans_example.py
-
-This example requires NumPy (http://www.numpy.org/).
-"""
-from __future__ import print_function
-import sys
-
-# $example on$
-from pyspark.ml.clustering import KMeans
-from pyspark.ml.evaluation import ClusteringEvaluator
-# $example off$
-
-from pyspark.sql import SparkSession
-
-if __name__ == "__main__":
-    spark = SparkSession\
-        .builder\
-        .appName("KMeansExample")\
-        .getOrCreate()
-
-    if (len(sys.argv) != 2) :
-        println("Require data file path as input parameter")
-        sys.exit(1)
-
-    # $example on$
-    # Loads data.
-    dataset = spark.read.format("libsvm").load(sys.argv[1])
-
-    # Trains a k-means model.
-    kmeans = KMeans().setK(2).setSeed(1)
-    model = kmeans.fit(dataset)
-
-    # Make predictions
-    predictions = model.transform(dataset)
-
-    # Evaluate clustering by computing Silhouette score
-    evaluator = ClusteringEvaluator()
-
-    silhouette = evaluator.evaluate(predictions)
-    print("Silhouette with squared euclidean distance = " + str(silhouette))
-
-    # Shows the result.
-    centers = model.clusterCenters()
-    print("Cluster Centers: ")
-    for center in centers:
-        print(center)
-    # $example off$
-
-    spark.stop()
-
diff --git a/dev/test-cluster/workloads/run-kmeans-pyspark.sh b/dev/test-cluster/workloads/run-kmeans-pyspark.sh
deleted file mode 100755
index e07f3f7b6..000000000
--- a/dev/test-cluster/workloads/run-kmeans-pyspark.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/env bash
-
-source ../envs.sh
-
-# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt), the data file should be copied to HDFS
-$HADOOP_HOME/bin/hadoop fs -mkdir -p $OAP_MLLIB_DATA_ROOT
-$HADOOP_HOME/bin/hadoop fs -copyFromLocal $SPARK_HOME/data/mllib/sample_kmeans_data.txt $OAP_MLLIB_DATA_ROOT
-
-# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=1G
-SPARK_NUM_EXECUTORS=2
-SPARK_EXECUTOR_CORES=1
-SPARK_EXECUTOR_MEMORY=1G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-
-# ======================================================= #
-
-# Check env
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-APP_PY="$OAP_MLLIB_ROOT/dev/test-cluster/workloads/kmeans-pyspark.py"
-DATA_FILE=$OAP_MLLIB_DATA_ROOT/sample_kmeans_data.txt
-
-$SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
-    --num-executors $SPARK_NUM_EXECUTORS \
-    --driver-memory $SPARK_DRIVER_MEMORY \
-    --executor-cores $SPARK_EXECUTOR_CORES \
-    --executor-memory $SPARK_EXECUTOR_MEMORY \
-    --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
-    --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
-    --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \    
-    --conf "spark.shuffle.reduceLocality.enabled=false" \
-    --conf "spark.network.timeout=1200s" \
-    --conf "spark.task.maxFailures=1" \
-    --jars $OAP_MLLIB_JAR \
-    $APP_PY $DATA_FILE
diff --git a/examples/als-hibench/pom.xml b/examples/als-hibench/pom.xml
deleted file mode 100644
index 68e02c256..000000000
--- a/examples/als-hibench/pom.xml
+++ /dev/null
@@ -1,100 +0,0 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" 
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-  <modelVersion>4.0.0</modelVersion>
-
-  <groupId>com.intel.oap</groupId>
-  <artifactId>oap-mllib-examples</artifactId>
-  <version>0.9.0-with-spark-3.0.0</version>
-  <packaging>jar</packaging>
-
-  <name>ALSHiBenchExample</name>
-  <url>https://github.com/Intel-bigdata/OAP</url>
-
-  <properties>
-    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <scala.version>2.12.10</scala.version>
-    <scala.binary.version>2.12</scala.binary.version>
-    <spark.version>3.0.0</spark.version>
-  </properties>
-
-  <dependencies>
-
-    <dependency>
-      <groupId>org.scala-lang</groupId>
-      <artifactId>scala-library</artifactId>
-      <version>2.12.10</version>
-    </dependency>
-
-     <dependency>
-      <groupId>com.github.scopt</groupId>
-      <artifactId>scopt_2.12</artifactId>
-      <version>3.7.0</version>      
-    </dependency>
-
-    <!--<dependency>-->
-      <!--<groupId>com.github.fommil.netlib</groupId>-->
-      <!--<artifactId>all</artifactId>-->
-      <!--<version>1.1.2</version>      -->
-      <!--<type>pom</type>-->
-    <!--</dependency>-->
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-sql_2.12</artifactId>
-      <version>${spark.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
-    <dependency>
-      <groupId>org.apache.spark</groupId>
-      <artifactId>spark-mllib_2.12</artifactId>
-      <version>${spark.version}</version>
-      <scope>provided</scope>
-    </dependency>
-
-  </dependencies>
-
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.scala-tools</groupId>
-        <artifactId>maven-scala-plugin</artifactId>
-        <version>2.15.2</version>
-        <executions>
-          <execution>
-            <goals>
-              <goal>compile</goal>
-              <goal>testCompile</goal>
-            </goals>
-          </execution>
-        </executions>
-        <configuration>
-          <scalaVersion>${scala.version}</scalaVersion>
-          <args>
-            <arg>-target:jvm-1.8</arg>
-          </args>
-        </configuration>
-      </plugin>
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <version>3.0.0</version>
-        <configuration>
-          <appendAssemblyId>false</appendAssemblyId>
-          <descriptorRefs>
-            <descriptorRef>jar-with-dependencies</descriptorRef>
-          </descriptorRefs>
-        </configuration>
-        <executions>
-          <execution>
-            <id>assembly</id>
-            <phase>package</phase>
-            <goals>
-              <goal>single</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-    </plugins>
-  </build>
-
-</project>
diff --git a/examples/als-hibench/run-hibench-oap-mllib.sh b/examples/als-hibench/run-hibench-oap-mllib.sh
deleted file mode 100755
index 050b80558..000000000
--- a/examples/als-hibench/run-hibench-oap-mllib.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env bash
-
-export HDFS_ROOT=hdfs://sr591:8020
-export OAP_MLLIB_ROOT=/home/xiaochang/Works/OAP-xwu99-als/oap-mllib
-
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=16G
-SPARK_NUM_EXECUTORS=6
-SPARK_EXECUTOR_CORES=28
-SPARK_EXECUTOR_MEMORY_OVERHEAD=25G
-SPARK_EXECUTOR_MEMORY=100G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-#SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES)
-
-# ======================================================= #
-
-# for log suffix
-SUFFIX=$( basename -s .sh "${BASH_SOURCE[0]}" )
-
-# Check envs
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-
-# Target jar built
-OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar
-OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
-
-# Use absolute path
-SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
-# Use relative path
-SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
-
-APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar
-APP_CLASS=com.intel.hibench.sparkbench.ml.ALSExample
-
-HDFS_INPUT=hdfs://sr591:8020/HiBench/ALS/Input
-RANK=10
-NUM_ITERATIONS=1
-LAMBDA=0.1
-IMPLICIT=true
-
-/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
-    --num-executors $SPARK_NUM_EXECUTORS \
-    --driver-memory $SPARK_DRIVER_MEMORY \
-    --executor-cores $SPARK_EXECUTOR_CORES \
-    --executor-memory $SPARK_EXECUTOR_MEMORY \
-    --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
-    --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
-    --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
-    --conf "spark.shuffle.reduceLocality.enabled=false" \
-    --conf "spark.executor.memoryOverhead=$SPARK_EXECUTOR_MEMORY_OVERHEAD" \
-    --conf "spark.network.timeout=1200s" \
-    --conf "spark.task.maxFailures=1" \
-    --jars $OAP_MLLIB_JAR \
-    --class $APP_CLASS \
-    $APP_JAR \
-    --rank $RANK --numIterations $NUM_ITERATIONS --implicitPrefs $IMPLICIT --lambda $LAMBDA \
-    --numProductBlocks $SPARK_DEFAULT_PARALLELISM --numUserBlocks $SPARK_DEFAULT_PARALLELISM \
-    $HDFS_INPUT \
-    2>&1 | tee ALS-$SUFFIX-$(date +%m%d_%H_%M_%S).log
-
diff --git a/examples/als-hibench/run-hibench-vanilla.sh b/examples/als-hibench/run-hibench-vanilla.sh
deleted file mode 100755
index 6cb6b3ae7..000000000
--- a/examples/als-hibench/run-hibench-vanilla.sh
+++ /dev/null
@@ -1,61 +0,0 @@
-#!/usr/bin/env bash
-
-export HDFS_ROOT=hdfs://sr591:8020
-
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=16G
-SPARK_NUM_EXECUTORS=6
-SPARK_EXECUTOR_CORES=28
-SPARK_EXECUTOR_MEMORY_OVERHEAD=25G
-SPARK_EXECUTOR_MEMORY=100G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-
-# ======================================================= #
-
-# for log suffix
-SUFFIX=$( basename -s .sh "${BASH_SOURCE[0]}" )
-
-# Check envs
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-
-APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar
-APP_CLASS=com.intel.hibench.sparkbench.ml.ALSExample
-
-HDFS_INPUT=hdfs://sr591:8020/HiBench/ALS/Input
-RANK=10
-NUM_ITERATIONS=1
-LAMBDA=0.1
-IMPLICIT=true
-
-/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
-    --num-executors $SPARK_NUM_EXECUTORS \
-    --driver-memory $SPARK_DRIVER_MEMORY \
-    --executor-cores $SPARK_EXECUTOR_CORES \
-    --executor-memory $SPARK_EXECUTOR_MEMORY \
-    --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
-    --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
-    --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
-    --conf "spark.shuffle.reduceLocality.enabled=false" \
-    --conf "spark.executor.memoryOverhead=$SPARK_EXECUTOR_MEMORY_OVERHEAD" \
-    --conf "spark.network.timeout=1200s" \
-    --conf "spark.task.maxFailures=1" \
-    --class $APP_CLASS \
-    $APP_JAR \
-    --rank $RANK --numIterations $NUM_ITERATIONS --implicitPrefs $IMPLICIT --lambda $LAMBDA \
-    --numProductBlocks $SPARK_DEFAULT_PARALLELISM --numUserBlocks $SPARK_DEFAULT_PARALLELISM \
-    $HDFS_INPUT \
-    2>&1 | tee ALS-$SUFFIX-$(date +%m%d_%H_%M_%S).log
-
diff --git a/examples/als-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/ALSExample.scala b/examples/als-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/ALSExample.scala
deleted file mode 100644
index 5a29bcc80..000000000
--- a/examples/als-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/ALSExample.scala
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.intel.hibench.sparkbench.ml
-
-import org.apache.spark.ml.evaluation.RegressionEvaluator
-import org.apache.spark.ml.recommendation.ALS
-import org.apache.spark.ml.recommendation.ALS.Rating
-import org.apache.spark.sql.SparkSession
-import scopt.OptionParser
-
-object ALSExample {
-
-  case class Params(
-    dataPath: String = null,
-    numIterations: Int = 10,
-    lambda: Double = 0.1,
-    rank: Int = 10,
-    numUserBlocks: Int = 10,
-    numItemBlocks: Int = 10,
-    implicitPrefs: Boolean = false)
-
-  def main(args: Array[String]) {
-    val defaultParams = Params()
-
-    val parser = new OptionParser[Params]("ALS") {
-      head("ALS: an example app for ALS on User-Item data.")
-      opt[Int]("rank")
-        .text(s"rank, default: ${defaultParams.rank}")
-        .action((x, c) => c.copy(rank = x))
-      opt[Int]("numIterations")
-        .text(s"number of iterations, default: ${defaultParams.numIterations}")
-        .action((x, c) => c.copy(numIterations = x))
-      opt[Double]("lambda")
-        .text(s"regularization parameter, default: ${defaultParams.lambda}")
-        .action((x, c) => c.copy(lambda = x))
-      opt[Int]("numUserBlocks")
-        .text(s"number of user blocks, default: ${defaultParams.numUserBlocks}")
-        .action((x, c) => c.copy(numUserBlocks = x))
-      opt[Int]("numProductBlocks")
-        .text(s"number of product blocks, default: ${defaultParams.numItemBlocks}")
-        .action((x, c) => c.copy(numItemBlocks = x))
-      opt[Boolean]("implicitPrefs")
-        .text("implicit preference, default: ${defaultParams.implicitPrefs}")
-        .action((x, c) => c.copy(implicitPrefs = x))
-      arg[String]("<dataPath>")
-        .required()
-        .text("Input paths to a User-Product dataset of ratings")
-        .action((x, c) => c.copy(dataPath = x))
-    }
-    parser.parse(args, defaultParams) match {
-      case Some(params) => run(params)
-      case _ => sys.exit(1)
-    }
-  }
-
-  def run(params: Params): Unit = {
-    val spark = SparkSession
-      .builder
-      .appName(s"ALS with $params")
-      .getOrCreate()
-    val sc = spark.sparkContext
-
-    import spark.implicits._
-
-    val ratings = sc.objectFile[Rating[Int]](params.dataPath).toDF()
-
-    val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2), 1L)
-
-    // Build the recommendation model using ALS on the training data
-    val als = new ALS()
-      .setRank(params.rank)
-      .setMaxIter(params.numIterations)
-      .setRegParam(params.lambda)
-      .setImplicitPrefs(params.implicitPrefs)
-      .setNumUserBlocks(params.numUserBlocks)
-      .setNumItemBlocks(params.numItemBlocks)
-      .setUserCol("user")
-      .setItemCol("item")
-      .setRatingCol("rating")
-    val model = als.fit(training)
-
-    // Evaluate the model by computing the RMSE on the test data
-    // Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
-    model.setColdStartStrategy("drop")
-    val predictions = model.transform(test)
-
-    val evaluator = new RegressionEvaluator()
-      .setMetricName("rmse")
-      .setLabelCol("rating")
-      .setPredictionCol("prediction")
-    val rmse = evaluator.evaluate(predictions)
-    println(s"Root-mean-square error = $rmse")
-
-    spark.stop()
-  }
-}
diff --git a/examples/als-pyspark/als-pyspark.py b/examples/als-pyspark/als-pyspark.py
index 8847ca2b9..12622a4fd 100644
--- a/examples/als-pyspark/als-pyspark.py
+++ b/examples/als-pyspark/als-pyspark.py
@@ -44,8 +44,8 @@
     parts = lines.map(lambda row: row.value.split("::"))
     ratingsRDD = parts.map(lambda p: Row(userId=int(p[0]), movieId=int(p[1]),
                                          rating=float(p[2])))
-    ratings = spark.createDataFrame(ratingsRDD)   
-    # (training, test) = ratings.randomSplit([0.8, 0.2])
+    ratings = spark.createDataFrame(ratingsRDD)
+    (training, test) = ratings.randomSplit([0.8, 0.2])
 
     # Build the recommendation model using ALS on the training data
     # Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
@@ -55,13 +55,13 @@
     print("\nALS training with implicitPrefs={}, rank={}, maxIter={}, regParam={}, alpha={}, seed={}\n".format(
         als.getImplicitPrefs(), als.getRank(), als.getMaxIter(), als.getRegParam(), als.getAlpha(), als.getSeed()
     ))          
-    model = als.fit(ratings)    
+    model = als.fit(training)
 
-     # Evaluate the model by computing the RMSE on the test data
-    # predictions = model.transform(test)
-    # evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",
-    #                                 predictionCol="prediction")
-    # rmse = evaluator.evaluate(predictions)
-    # print("Root-mean-square error = " + str(rmse))
+    # Evaluate the model by computing the RMSE on the test data
+    predictions = model.transform(test)
+    evaluator = RegressionEvaluator(metricName="rmse", labelCol="rating",
+                                    predictionCol="prediction")
+    rmse = evaluator.evaluate(predictions)
+    print("Root-mean-square error = " + str(rmse))
     
     spark.stop()
diff --git a/examples/als-pyspark/run.sh b/examples/als-pyspark/run.sh
index b3ba1b6d2..044c857f6 100755
--- a/examples/als-pyspark/run.sh
+++ b/examples/als-pyspark/run.sh
@@ -1,62 +1,14 @@
 #!/usr/bin/env bash
 
-# == User to customize the following environments ======= #
+source ../../conf/env.sh
 
-# Set user Spark and Hadoop home directory
-#export SPARK_HOME=/path/to/your/spark/home
-#export HADOOP_HOME=/path/to/your/hadoop/home
-# Set user HDFS Root
-export HDFS_ROOT=hdfs://sr549:8020
-# Set user Intel MLlib Root directory
-export OAP_MLLIB_ROOT=/home/xiaochang/Works/OAP-xwu99-als/oap-mllib
-# Set IP and Port for oneCCL KVS, you can select any one of the worker nodes and set CCL_KVS_IP_PORT to its IP and Port
-# IP can be got with `hostname -I`, if multiple IPs are returned, the first IP should be used. Port can be any available port.
-# For example, if one of the worker IP is 192.168.0.1 and an available port is 51234. 
-# CCL_KVS_IP_PORT can be set in the format of 192.168.0.1_51234
-# Incorrectly setting this value will result in hanging when oneCCL initialize
-export CCL_KVS_IP_PORT=10.0.2.149_51234
-
-# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt), the data file should be copied to HDFS
+# Data file is converted from oneDAL examples ($DAALROOT/examples/daal/data/batch/implicit_als_csr.csv)
+# The data file should be copied to $HDFS_ROOT before running examples
 DATA_FILE=data/onedal_als_csr_ratings.txt
 
-# == User to customize Spark executor cores and memory == #
-
-# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=1G
-SPARK_NUM_EXECUTORS=2
-SPARK_EXECUTOR_CORES=1
-SPARK_EXECUTOR_MEMORY=1G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-
-# ======================================================= #
-
-# Check env
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-
-# Target jar built
-OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar
-OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
-
-# Use absolute path
-SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
-# Use relative path
-SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
-
 APP_PY=als-pyspark.py
 
-/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
+time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
     --num-executors $SPARK_NUM_EXECUTORS \
     --driver-memory $SPARK_DRIVER_MEMORY \
     --executor-cores $SPARK_EXECUTOR_CORES \
@@ -66,7 +18,6 @@ APP_PY=als-pyspark.py
     --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
     --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
     --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
-    --conf "spark.executorEnv.CCL_KVS_IP_PORT=$CCL_KVS_IP_PORT" \
     --conf "spark.shuffle.reduceLocality.enabled=false" \
     --conf "spark.network.timeout=1200s" \
     --conf "spark.task.maxFailures=1" \
diff --git a/examples/als-hibench/build.sh b/examples/als/build.sh
similarity index 52%
rename from examples/als-hibench/build.sh
rename to examples/als/build.sh
index 8cbc692be..3c01d1689 100755
--- a/examples/als-hibench/build.sh
+++ b/examples/als/build.sh
@@ -1,3 +1,4 @@
 #!/usr/bin/env bash
 
-mvn clean package
\ No newline at end of file
+mvn clean package
+
diff --git a/examples/kmeans-hibench/pom.xml b/examples/als/pom.xml
similarity index 91%
rename from examples/kmeans-hibench/pom.xml
rename to examples/als/pom.xml
index 3f5b56e29..94a72189a 100644
--- a/examples/kmeans-hibench/pom.xml
+++ b/examples/als/pom.xml
@@ -4,14 +4,15 @@
 
   <groupId>com.intel.oap</groupId>
   <artifactId>oap-mllib-examples</artifactId>
-  <version>1.1.0-with-spark-3.0.0</version>
+  <version>${oap.version}-with-spark-${spark.version}</version>
   <packaging>jar</packaging>
 
-  <name>KMeansHiBenchExample</name>
+  <name>ALSExample</name>
   <url>https://github.com/oap-project/oap-mllib.git</url>
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <oap.version>1.1.0</oap.version>
     <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <spark.version>3.0.0</spark.version>
@@ -31,12 +32,6 @@
       <version>3.7.0</version>      
     </dependency>
 
-    <dependency>
-      <groupId>org.apache.mahout</groupId>
-      <artifactId>mahout-hdfs</artifactId>
-      <version>14.1</version>
-    </dependency>
-
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql_2.12</artifactId>
diff --git a/examples/als/run.sh b/examples/als/run.sh
new file mode 100755
index 000000000..8a317dbc7
--- /dev/null
+++ b/examples/als/run.sh
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+source ../../conf/env.sh
+
+# Data file is converted from oneDAL examples ($DAALROOT/examples/daal/data/batch/implicit_als_csr.csv)
+# The data file should be copied to $HDFS_ROOT before running examples
+DATA_FILE=data/onedal_als_csr_ratings.txt
+
+APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION-with-spark-3.0.0.jar
+APP_CLASS=org.apache.spark.examples.ml.ALSExample
+
+time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
+    --num-executors $SPARK_NUM_EXECUTORS \
+    --driver-memory $SPARK_DRIVER_MEMORY \
+    --executor-cores $SPARK_EXECUTOR_CORES \
+    --executor-memory $SPARK_EXECUTOR_MEMORY \
+    --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
+    --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
+    --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
+    --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
+    --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
+    --conf "spark.shuffle.reduceLocality.enabled=false" \
+    --conf "spark.network.timeout=1200s" \
+    --conf "spark.task.maxFailures=1" \
+    --jars $OAP_MLLIB_JAR \
+    --class $APP_CLASS \
+    $APP_JAR $DATA_FILE \
+    2>&1 | tee ALS-$(date +%m%d_%H_%M_%S).log
diff --git a/examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
new file mode 100644
index 000000000..1071e906b
--- /dev/null
+++ b/examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.evaluation.RegressionEvaluator
+import org.apache.spark.ml.recommendation.ALS
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+/**
+ * An example demonstrating ALS.
+ * Run with
+ * {{{
+ * bin/run-example ml.ALSExample
+ * }}}
+ */
+object ALSExample {
+
+  // $example on$
+  case class Rating(userId: Int, movieId: Int, rating: Float)
+  def parseRating(str: String): Rating = {
+    val fields = str.split("::")
+    assert(fields.size == 3)
+    Rating(fields(0).toInt, fields(1).toInt, fields(2).toFloat)
+  }
+  // $example off$
+
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder
+      .appName("ALSExample")
+      .getOrCreate()
+    import spark.implicits._
+
+    if (args.length != 1) {
+      println("Require data file path as input parameter")
+      sys.exit(1)
+    }
+
+    // $example on$
+    val ratings = spark.read.textFile(args(0))
+      .map(parseRating)
+      .toDF()
+    val Array(training, test) = ratings.randomSplit(Array(0.8, 0.2))
+
+    // Build the recommendation model using ALS on the training data
+    val als = new ALS()
+      .setImplicitPrefs(true)
+      .setRank(10)
+      .setMaxIter(5)
+      .setRegParam(0.01)
+      .setAlpha(40.0)
+      .setUserCol("userId")
+      .setItemCol("movieId")
+      .setRatingCol("rating")
+    val model = als.fit(training)
+
+    // Evaluate the model by computing the RMSE on the test data
+    // Note we set cold start strategy to 'drop' to ensure we don't get NaN evaluation metrics
+    model.setColdStartStrategy("drop")
+    val predictions = model.transform(test)
+
+    val evaluator = new RegressionEvaluator()
+      .setMetricName("rmse")
+      .setLabelCol("rating")
+      .setPredictionCol("prediction")
+    val rmse = evaluator.evaluate(predictions)
+    println(s"Root-mean-square error = $rmse")    
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
+
diff --git a/examples/build-all.sh b/examples/build-all.sh
new file mode 100755
index 000000000..62a95d255
--- /dev/null
+++ b/examples/build-all.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+for dir in kmeans pca als
+do
+  cd $dir
+  ./build.sh
+  cd ..
+done
diff --git a/examples/data/onedal_als_csr_ratings.txt b/examples/data/onedal_als_csr_ratings.txt
new file mode 100644
index 000000000..e3f5d3fae
--- /dev/null
+++ b/examples/data/onedal_als_csr_ratings.txt
@@ -0,0 +1,167 @@
+0::17::0.938283
+1::1::0.124207
+1::8::0.411504
+1::13::0.746992
+1::16::0.169437
+2::6::0.42021
+2::12::0.322446
+2::17::0.561519
+3::2::0.760795
+4::6::0.252532
+4::8::0.155777
+5::1::0.593943
+5::2::0.5289
+5::9::0.827276
+5::16::0.291741
+6::1::0.642664
+6::9::0.843839
+6::18::0.801948
+7::17::0.162832
+8::15::0.764005
+8::19::0.545399
+9::4::0.871051
+9::8::0.2094
+9::9::0.900738
+9::17::0.998866
+9::19::0.154139
+10::0::0.59703
+10::6::0.727774
+10::15::0.877197
+11::3::0.598636
+11::5::0.999655
+11::7::0.23638
+11::14::0.463678
+11::17::0.802767
+11::18::0.828629
+12::3::0.449008
+12::4::0.108126
+12::5::0.17944
+12::11::0.14992
+12::15::0.645085
+12::17::0.356908
+13::7::0.54838
+13::13::0.719667
+14::1::0.144589
+14::2::0.956232
+14::4::0.410129
+14::5::0.237406
+14::9::0.701227
+15::5::0.598455
+15::13::0.534545
+15::18::0.85741
+16::0::0.08512
+16::1::0.306062
+17::2::0.87395
+17::6::0.680554
+17::15::0.383043
+17::20::0.16813
+18::2::0.641488
+18::4::0.542261
+18::10::0.69714
+18::11::0.776203
+18::17::0.498716
+18::18::0.788093
+18::20::0.52406
+19::4::0.10402
+19::7::0.276732
+20::1::0.666263
+20::6::0.280048
+21::5::0.898574
+21::6::0.892768
+21::9::0.061185
+21::18::0.691028
+22::7::0.813807
+22::16::0.293614
+23::10::0.217541
+23::14::0.98958
+23::15::0.20269
+24::7::0.67432
+24::8::0.520428
+24::10::0.138665
+24::13::0.364809
+24::14::0.970167
+24::19::0.68381
+25::0::0.166145
+25::1::0.194913
+25::2::0.265607
+25::18::0.740052
+25::19::0.209377
+26::2::0.122306
+26::8::0.742562
+26::11::0.405206
+26::16::0.442783
+27::12::0.010994
+27::16::0.632512
+27::17::0.421555
+28::1::0.854519
+28::3::0.843519
+28::7::0.388753
+28::12::0.020689
+28::13::0.071531
+28::14::0.537579
+28::16::0.079456
+29::17::0.548573
+30::1::0.959732
+30::3::0.913432
+30::4::0.88553
+31::0::0.653987
+31::13::0.736684
+31::20::0.629751
+32::2::0.420538
+32::6::0.110444
+32::12::0.55993
+32::13::0.730668
+32::17::0.588223
+32::18::0.188579
+33::8::0.717314
+33::9::0.249797
+33::10::0.404286
+33::18::0.83197
+34::4::0.364628
+34::6::0.023655
+34::10::0.94169
+35::3::0.015393
+35::11::0.356229
+35::18::0.328241
+36::0::0.03866
+36::1::0.21685
+36::5::0.725101
+36::8::0.191972
+36::9::0.658415
+36::12::0.592436
+37::2::0.812225
+37::4::0.411506
+37::6::0.613151
+37::9::0.345352
+37::10::0.89008
+37::12::0.139664
+37::17::0.7633
+37::20::0.488679
+38::0::0.594923
+38::1::0.441561
+38::13::0.467085
+39::0::0.949957
+39::7::0.360488
+39::12::0.354949
+39::15::0.976556
+39::17::0.024024
+40::1::0.121904
+40::4::0.871203
+40::8::0.102956
+41::9::0.593112
+42::3::0.542693
+42::4::0.340404
+42::6::0.997438
+42::7::0.335679
+42::10::0.657767
+42::11::0.382666
+42::14::0.621782
+42::17::0.150028
+43::16::0.318803
+43::17::0.83869
+44::10::0.460685
+45::0::0.926797
+45::4::0.257822
+45::8::0.714351
+45::17::0.333358
+45::18::0.134587
diff --git a/examples/pca-pyspark/data/pca_data.csv b/examples/data/pca_data.csv
similarity index 100%
rename from examples/pca-pyspark/data/pca_data.csv
rename to examples/data/pca_data.csv
diff --git a/examples/data/sample_kmeans_data.txt b/examples/data/sample_kmeans_data.txt
new file mode 100644
index 000000000..50013776b
--- /dev/null
+++ b/examples/data/sample_kmeans_data.txt
@@ -0,0 +1,6 @@
+0 1:0.0 2:0.0 3:0.0
+1 1:0.1 2:0.1 3:0.1
+2 1:0.2 2:0.2 3:0.2
+3 1:9.0 2:9.0 3:9.0
+4 1:9.1 2:9.1 3:9.1
+5 1:9.2 2:9.2 3:9.2
diff --git a/examples/kmeans-hibench/run-hibench-oap-mllib.sh b/examples/kmeans-hibench/run-hibench-oap-mllib.sh
deleted file mode 100755
index caa42584f..000000000
--- a/examples/kmeans-hibench/run-hibench-oap-mllib.sh
+++ /dev/null
@@ -1,86 +0,0 @@
-#!/usr/bin/env bash
-
-# == User to customize the following environments ======= #
-
-# Set user Spark and Hadoop home directory
-export SPARK_HOME=/path/to/your/spark/home
-export HADOOP_HOME=/path/to/your/hadoop/home
-# Set user HDFS Root
-export HDFS_ROOT=hdfs://your_hostname:8020
-# Set user Intel MLlib Root directory
-export OAP_MLLIB_ROOT=/path/to/your/OAP/oap-mllib
-# Set IP and Port for oneCCL KVS, you can select any one of the worker nodes and set CCL_KVS_IP_PORT to its IP and Port
-# IP can be got with `hostname -I`, if multiple IPs are returned, the first IP should be used. Port can be any available port.
-# For example, if one of the worker IP is 192.168.0.1 and an available port is 51234. 
-# CCL_KVS_IP_PORT can be set in the format of 192.168.0.1_51234
-# Incorrectly setting this value will result in hanging when oneCCL initialize
-export CCL_KVS_IP_PORT=192.168.0.1_51234
-
-# == User to customize Spark executor cores and memory == #
-
-# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=8G
-SPARK_NUM_EXECUTORS=6
-SPARK_EXECUTOR_CORES=15
-SPARK_EXECUTOR_MEMORY_OVERHEAD=25G
-SPARK_EXECUTOR_MEMORY=50G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-
-# ======================================================= #
-
-# for log suffix
-SUFFIX=$( basename -s .sh "${BASH_SOURCE[0]}" )
-
-# Check envs
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-
-# Target jar built
-OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar
-OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
-
-# Use absolute path
-SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
-# Use relative path
-SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
-
-APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar
-APP_CLASS=com.intel.hibench.sparkbench.ml.DenseKMeansDS
-
-K=200
-INIT_MODE=Random
-MAX_ITERATION=20
-INPUT_HDFS=$HDFS_ROOT/HiBench/Kmeans/Input/samples
-
-/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
-    --num-executors $SPARK_NUM_EXECUTORS \
-    --driver-memory $SPARK_DRIVER_MEMORY \
-    --executor-cores $SPARK_EXECUTOR_CORES \
-    --executor-memory $SPARK_EXECUTOR_MEMORY \
-    --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
-    --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
-    --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
-    --conf "spark.executorEnv.CCL_KVS_IP_PORT=$CCL_KVS_IP_PORT" \
-    --conf "spark.shuffle.reduceLocality.enabled=false" \
-    --conf "spark.executor.memoryOverhead=$SPARK_EXECUTOR_MEMORY_OVERHEAD" \
-    --conf "spark.memory.fraction=0.8" \
-    --conf "spark.network.timeout=1200s" \
-    --conf "spark.task.maxFailures=1" \
-    --jars $OAP_MLLIB_JAR \
-    --class $APP_CLASS \
-    $APP_JAR \
-    -k $K --initMode $INIT_MODE --numIterations $MAX_ITERATION $INPUT_HDFS \
-    2>&1 | tee KMeansHiBench-$SUFFIX-$(date +%m%d_%H_%M_%S).log
diff --git a/examples/kmeans-hibench/run-hibench-vanilla.sh b/examples/kmeans-hibench/run-hibench-vanilla.sh
deleted file mode 100755
index 475c25aff..000000000
--- a/examples/kmeans-hibench/run-hibench-vanilla.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/usr/bin/env bash
-
-# == User to customize the following environments ======= #
-
-# Set user Spark and Hadoop home directory
-export SPARK_HOME=/path/to/your/spark/home
-export HADOOP_HOME=/path/to/your/hadoop/home
-# Set user HDFS Root
-export HDFS_ROOT=hdfs://your_hostname:8020
-
-# == User to customize Spark executor cores and memory == #
-
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=8G
-SPARK_NUM_EXECUTORS=6
-SPARK_EXECUTOR_CORES=15
-SPARK_EXECUTOR_MEMORY=75G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-
-# ======================================================= #
-
-# for log suffix
-SUFFIX=$( basename -s .sh "${BASH_SOURCE[0]}" )
-
-# Check envs
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-
-APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar
-APP_CLASS=com.intel.hibench.sparkbench.ml.DenseKMeansDS
-
-K=200
-INIT_MODE=Random
-MAX_ITERATION=20
-INPUT_HDFS=$HDFS_ROOT/HiBench/Kmeans/Input/samples
-
-/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
-    --num-executors $SPARK_NUM_EXECUTORS \
-    --driver-memory $SPARK_DRIVER_MEMORY \
-    --executor-cores $SPARK_EXECUTOR_CORES \
-    --executor-memory $SPARK_EXECUTOR_MEMORY \
-    --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
-    --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
-    --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
-    --class $APP_CLASS \
-    $APP_JAR \
-    -k $K --initMode $INIT_MODE --numIterations $MAX_ITERATION $INPUT_HDFS \
-    2>&1 | tee KMeansHiBench-$SUFFIX-$(date +%m%d_%H_%M_%S).log
diff --git a/examples/kmeans-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/DenseKMeansDS.scala b/examples/kmeans-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/DenseKMeansDS.scala
deleted file mode 100644
index 3a949bb1c..000000000
--- a/examples/kmeans-hibench/src/main/scala/com/intel/hibench/sparkbench/ml/DenseKMeansDS.scala
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.intel.hibench.sparkbench.ml
-
-import breeze.linalg.DenseVector
-import org.apache.hadoop.io.LongWritable
-import org.apache.mahout.math.VectorWritable
-import org.apache.spark.ml.clustering.KMeans
-import org.apache.spark.ml.evaluation.ClusteringEvaluator
-import org.apache.spark.ml.linalg.{Vector, Vectors}
-import org.apache.spark.{SparkConf, SparkContext}
-import org.apache.spark.sql._
-import scopt.OptionParser
-import org.apache.spark.sql.SparkSession
-
-object DenseKMeansDS {
-
-  object InitializationMode extends Enumeration {
-    type InitializationMode = Value
-    val Random, Parallel = Value
-  }
-
-  import com.intel.hibench.sparkbench.ml.DenseKMeansDS.InitializationMode._
-
-  case class Params(input: String = null,
-                    k: Int = -1,
-                    numIterations: Int = 10,
-                    initializationMode: InitializationMode = Random)
-
-  def main(args: Array[String]) {
-    val defaultParams = Params()
-
-    val parser = new OptionParser[Params]("DenseKMeans") {
-      head("DenseKMeans: an example k-means app for dense data.")
-      opt[Int]('k', "k")
-        .required()
-        .text(s"number of clusters, required")
-        .action((x, c) => c.copy(k = x))
-      opt[Int]("numIterations")
-        .text(s"number of iterations, default; ${defaultParams.numIterations}")
-        .action((x, c) => c.copy(numIterations = x))
-      opt[String]("initMode")
-        .text(s"initialization mode (${InitializationMode.values.mkString(",")}), " +
-          s"default: ${defaultParams.initializationMode}")
-        .action((x, c) => c.copy(initializationMode = InitializationMode.withName(x)))
-      arg[String]("<input>")
-        .text("input paths to examples")
-        .required()
-        .action((x, c) => c.copy(input = x))
-    }
-
-    parser.parse(args, defaultParams).map { params =>
-      run(params)
-    }.getOrElse {
-      sys.exit(1)
-    }
-  }
-
-  def run(params: Params) {
-    val spark = SparkSession
-      .builder
-      .appName(s"DenseKMeansDS with $params")
-      .getOrCreate()
-    import spark.implicits._
-
-    val sc = spark.sparkContext
-
-    val data = sc.sequenceFile[LongWritable, VectorWritable](params.input)
-
-    // Should use Tuple1 to warp around for calling toDF
-    val dataset = data.map { case (k, v) =>
-      var vector: Array[Double] = new Array[Double](v.get().size)
-      for (i <- 0 until v.get().size) vector(i) = v.get().get(i)
-      Tuple1(Vectors.dense(vector))
-    }.toDF("features")    
-
-    val initMode = params.initializationMode match {
-      case Random => "random"
-      case Parallel => "k-means||"
-    }
-
-    val model = new KMeans()
-      .setInitMode(initMode)
-      .setK(params.k)
-      .setMaxIter(params.numIterations)
-      .setSeed(1L)
-      .fit(dataset)    
-
-    spark.stop()
-  }
-}
-
diff --git a/examples/kmeans-pyspark/run.sh b/examples/kmeans-pyspark/run.sh
index d029bf294..0fa2a7bcb 100755
--- a/examples/kmeans-pyspark/run.sh
+++ b/examples/kmeans-pyspark/run.sh
@@ -1,62 +1,14 @@
 #!/usr/bin/env bash
 
-# == User to customize the following environments ======= #
+source ../../conf/env.sh
 
-# Set user Spark and Hadoop home directory
-export SPARK_HOME=/path/to/your/spark/home
-export HADOOP_HOME=/path/to/your/hadoop/home
-# Set user HDFS Root
-export HDFS_ROOT=hdfs://your_hostname:8020
-# Set user Intel MLlib Root directory
-export OAP_MLLIB_ROOT=/path/to/your/OAP/oap-mllib
-# Set IP and Port for oneCCL KVS, you can select any one of the worker nodes and set CCL_KVS_IP_PORT to its IP and Port
-# IP can be got with `hostname -I`, if multiple IPs are returned, the first IP should be used. Port can be any available port.
-# For example, if one of the worker IP is 192.168.0.1 and an available port is 51234. 
-# CCL_KVS_IP_PORT can be set in the format of 192.168.0.1_51234
-# Incorrectly setting this value will result in hanging when oneCCL initialize
-export CCL_KVS_IP_PORT=192.168.0.1_51234
-
-# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt), the data file should be copied to HDFS
+# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data
+# The data file should be copied to $HDFS_ROOT before running examples
 DATA_FILE=data/sample_kmeans_data.txt
 
-# == User to customize Spark executor cores and memory == #
-
-# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=1G
-SPARK_NUM_EXECUTORS=2
-SPARK_EXECUTOR_CORES=1
-SPARK_EXECUTOR_MEMORY=1G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-
-# ======================================================= #
-
-# Check env
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-
-# Target jar built
-OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar
-OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
-
-# Use absolute path
-SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
-# Use relative path
-SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
-
 APP_PY=kmeans-pyspark.py
 
-/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
+time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
     --num-executors $SPARK_NUM_EXECUTORS \
     --driver-memory $SPARK_DRIVER_MEMORY \
     --executor-cores $SPARK_EXECUTOR_CORES \
@@ -66,7 +18,6 @@ APP_PY=kmeans-pyspark.py
     --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
     --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
     --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
-    --conf "spark.executorEnv.CCL_KVS_IP_PORT=$CCL_KVS_IP_PORT" \
     --conf "spark.shuffle.reduceLocality.enabled=false" \
     --conf "spark.network.timeout=1200s" \
     --conf "spark.task.maxFailures=1" \
diff --git a/examples/kmeans/pom.xml b/examples/kmeans/pom.xml
index 71c4ea6d7..476a98ce6 100644
--- a/examples/kmeans/pom.xml
+++ b/examples/kmeans/pom.xml
@@ -4,7 +4,7 @@
 
   <groupId>com.intel.oap</groupId>
   <artifactId>oap-mllib-examples</artifactId>
-  <version>1.1.0-with-spark-3.0.0</version>
+  <version>${oap.version}-with-spark-${spark.version}</version>
   <packaging>jar</packaging>
 
   <name>KMeansExample</name>
@@ -12,6 +12,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <oap.version>1.1.0</oap.version>
     <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <spark.version>3.0.0</spark.version>
diff --git a/examples/kmeans/run.sh b/examples/kmeans/run.sh
index bb0f9ac78..00b782464 100755
--- a/examples/kmeans/run.sh
+++ b/examples/kmeans/run.sh
@@ -1,64 +1,15 @@
 #!/usr/bin/env bash
 
-# == User to customize the following environments ======= #
+source ../../conf/env.sh
 
-# Set user Spark and Hadoop home directory
-export SPARK_HOME=/path/to/your/spark/home
-export HADOOP_HOME=/path/to/your/hadoop/home
-# Set user HDFS Root
-export HDFS_ROOT=hdfs://your_hostname:8020
-# Set user Intel MLlib Root directory
-export OAP_MLLIB_ROOT=/path/to/your/OAP/oap-mllib
-# Set IP and Port for oneCCL KVS, you can select any one of the worker nodes and set CCL_KVS_IP_PORT to its IP and Port
-# IP can be got with `hostname -I`, if multiple IPs are returned, the first IP should be used. Port can be any available port.
-# For example, if one of the worker IP is 192.168.0.1 and an available port is 51234. 
-# CCL_KVS_IP_PORT can be set in the format of 192.168.0.1_51234
-# Incorrectly setting this value will result in hanging when oneCCL initialize
-export CCL_KVS_IP_PORT=192.168.0.1_51234
-
-# Data file is from Spark examples' data (data/mllib/sample_kmeans_data.txt)
-# This data file should be copied to HDFS hdfs://your_hostname:8020/user/<user_name>/data/sample_kmeans_data.txt
+# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data
+# The data file should be copied to $HDFS_ROOT before running examples
 DATA_FILE=data/sample_kmeans_data.txt
 
-# == User to customize Spark executor cores and memory == #
-
-# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=1G
-SPARK_NUM_EXECUTORS=2
-SPARK_EXECUTOR_CORES=1
-SPARK_EXECUTOR_MEMORY=1G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-
-# ======================================================= #
-
-# Check envs
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-
-# Target jar built
-OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar
-OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
-
-# Use absolute path
-SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
-# Use relative path
-SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
-
-APP_JAR=target/oap-mllib-examples-0.9.0-with-spark-3.0.0.jar
+APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION-with-spark-3.0.0.jar
 APP_CLASS=org.apache.spark.examples.ml.KMeansExample
 
-/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
+time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
     --num-executors $SPARK_NUM_EXECUTORS \
     --driver-memory $SPARK_DRIVER_MEMORY \
     --executor-cores $SPARK_EXECUTOR_CORES \
@@ -68,7 +19,6 @@ APP_CLASS=org.apache.spark.examples.ml.KMeansExample
     --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
     --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
     --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
-    --conf "spark.executorEnv.CCL_KVS_IP_PORT=$CCL_KVS_IP_PORT" \
     --conf "spark.shuffle.reduceLocality.enabled=false" \
     --conf "spark.network.timeout=1200s" \
     --conf "spark.task.maxFailures=1" \
diff --git a/examples/pca-pyspark/run.sh b/examples/pca-pyspark/run.sh
index 20010ce6b..b3776e877 100755
--- a/examples/pca-pyspark/run.sh
+++ b/examples/pca-pyspark/run.sh
@@ -1,57 +1,15 @@
 #!/usr/bin/env bash
 
-# == User to customize the following environments ======= #
+source ../../conf/env.sh
 
-# Set user Spark and Hadoop home directory
-export SPARK_HOME=/path/to/your/spark/home
-export HADOOP_HOME=/path/to/your/hadoop/home
-# Set user HDFS Root
-export HDFS_ROOT=hdfs://your_hostname:8020
-# Set user Intel MLlib Root directory
-export OAP_MLLIB_ROOT=/path/to/your/OAP/oap-mllib
-
-# CSV data is the same as in Spark example "ml/pca_example.py", the data file should be copied to HDFS
+# CSV data is the same as in Spark example "ml/pca_example.py"
+# The data file should be copied to $HDFS_ROOT before running examples
 DATA_FILE=data/pca_data.csv
 
-# == User to customize Spark executor cores and memory == #
-
-# User should check the requested resources are acturally allocated by cluster manager or Intel MLlib will behave incorrectly
-SPARK_MASTER=yarn
-SPARK_DRIVER_MEMORY=1G
-SPARK_NUM_EXECUTORS=2
-SPARK_EXECUTOR_CORES=1
-SPARK_EXECUTOR_MEMORY=1G
-
-SPARK_DEFAULT_PARALLELISM=$(expr $SPARK_NUM_EXECUTORS '*' $SPARK_EXECUTOR_CORES '*' 2)
-
-# ======================================================= #
-
-# Check env
-if [[ -z $SPARK_HOME ]]; then
-    echo SPARK_HOME not defined!
-    exit 1
-fi
-
-if [[ -z $HADOOP_HOME ]]; then
-    echo HADOOP_HOME not defined!
-    exit 1
-fi
-
-export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
-
-# Target jar built
-OAP_MLLIB_JAR_NAME=oap-mllib-0.9.0-with-spark-3.0.0.jar
-OAP_MLLIB_JAR=$OAP_MLLIB_ROOT/mllib-dal/target/$OAP_MLLIB_JAR_NAME
-
-# Use absolute path
-SPARK_DRIVER_CLASSPATH=$OAP_MLLIB_JAR
-# Use relative path
-SPARK_EXECUTOR_CLASSPATH=./$OAP_MLLIB_JAR_NAME
-
 APP_PY=pca-pyspark.py
 K=3
 
-/usr/bin/time -p $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
+time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
     --num-executors $SPARK_NUM_EXECUTORS \
     --driver-memory $SPARK_DRIVER_MEMORY \
     --executor-cores $SPARK_EXECUTOR_CORES \
diff --git a/examples/kmeans-hibench/build.sh b/examples/pca/build.sh
similarity index 100%
rename from examples/kmeans-hibench/build.sh
rename to examples/pca/build.sh
diff --git a/examples/pca/pom.xml b/examples/pca/pom.xml
index 06cf2343c..75641da81 100644
--- a/examples/pca/pom.xml
+++ b/examples/pca/pom.xml
@@ -4,7 +4,7 @@
 
   <groupId>com.intel.oap</groupId>
   <artifactId>oap-mllib-examples</artifactId>
-  <version>1.1.0-with-spark-3.0.0</version>
+  <version>${oap.version}-with-spark-${spark.version}</version>  
   <packaging>jar</packaging>
 
   <name>PCAExample</name>
@@ -12,6 +12,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <oap.version>1.1.0</oap.version>
     <scala.version>2.12.10</scala.version>
     <scala.binary.version>2.12</scala.binary.version>
     <spark.version>3.0.0</spark.version>
diff --git a/examples/pca/run.sh b/examples/pca/run.sh
index da373645b..64d22c6fe 100755
--- a/examples/pca/run.sh
+++ b/examples/pca/run.sh
@@ -1,3 +1,24 @@
 #!/usr/bin/env bash
 
-mvn clean package
+source ../../conf/env.sh
+
+APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION-with-spark-3.0.0.jar
+APP_CLASS=org.apache.spark.examples.ml.PCAExample
+
+time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \
+    --num-executors $SPARK_NUM_EXECUTORS \
+    --driver-memory $SPARK_DRIVER_MEMORY \
+    --executor-cores $SPARK_EXECUTOR_CORES \
+    --executor-memory $SPARK_EXECUTOR_MEMORY \
+    --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
+    --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
+    --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
+    --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
+    --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
+    --conf "spark.shuffle.reduceLocality.enabled=false" \
+    --conf "spark.network.timeout=1200s" \
+    --conf "spark.task.maxFailures=1" \
+    --jars $OAP_MLLIB_JAR \
+    --class $APP_CLASS \
+    $APP_JAR $DATA_FILE $K \
+    2>&1 | tee PCA-$(date +%m%d_%H_%M_%S).log
diff --git a/examples/run-all-pyspark.sh b/examples/run-all-pyspark.sh
new file mode 100755
index 000000000..06decd242
--- /dev/null
+++ b/examples/run-all-pyspark.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+for dir in kmeans-pyspark pca-pyspark als-pyspark
+do
+  cd $dir
+  ./run.sh
+  cd ..
+done
diff --git a/examples/run-all-scala.sh b/examples/run-all-scala.sh
new file mode 100755
index 000000000..65636c250
--- /dev/null
+++ b/examples/run-all-scala.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+for dir in kmeans pca als
+do
+  cd $dir
+  ./run.sh
+  cd ..
+done
diff --git a/mllib-dal/build.sh b/mllib-dal/build.sh
index da1d8df75..bd88c998c 100755
--- a/mllib-dal/build.sh
+++ b/mllib-dal/build.sh
@@ -2,7 +2,12 @@
 
 # Check envs for building
 if [[ -z $JAVA_HOME ]]; then
- echo $JAVA_HOME not defined!
+ echo JAVA_HOME not defined!
+ exit 1
+fi
+
+if [[ -z $(which mvn) ]]; then
+ echo Maven not found!
  exit 1
 fi
 
@@ -26,7 +31,8 @@ echo JAVA_HOME=$JAVA_HOME
 echo DAALROOT=$DAALROOT
 echo TBBROOT=$TBBROOT
 echo CCL_ROOT=$CCL_ROOT
-echo GCC Version: $(gcc -dumpversion)
+echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ")
+echo Clang Version: $(clang -dumpversion)
 echo =============================
 
 mvn -DskipTests clean package
diff --git a/mllib-dal/pom.xml b/mllib-dal/pom.xml
index 4e51f9157..979c53d06 100644
--- a/mllib-dal/pom.xml
+++ b/mllib-dal/pom.xml
@@ -17,6 +17,13 @@
         <scala.version>2.12.10</scala.version>
         <scala.binary.version>2.12</scala.binary.version>
         <spark.version>3.0.0</spark.version>
+        <oneapi.version>2021.2.0</oneapi.version>
+        <tbb.lib>libtbb.so.12.2</tbb.lib>
+        <tbb.malloc.lib>libtbbmalloc.so.2.2</tbb.malloc.lib>
+        <dal.java.lib>libJavaAPI.so.1.1</dal.java.lib>
+        <ccl.lib>libccl.so</ccl.lib>
+        <ccl.fabric.lib>libfabric.so.1</ccl.fabric.lib>
+        <ccl.mpi.lib>libmpi.so.12.0.0</ccl.mpi.lib>
     </properties>
 
     <dependencies>
@@ -55,11 +62,11 @@
         </dependency>
 
         <dependency>
-            <groupId>com.intel.daal</groupId>
-            <artifactId>daal</artifactId>
-            <version>2021.1</version>
+            <groupId>com.intel.onedal</groupId>
+            <artifactId>onedal</artifactId>
+            <version>${oneapi.version}</version>
             <scope>system</scope>
-            <systemPath>${env.ONEAPI_ROOT}/dal/latest/lib/onedal.jar</systemPath>
+            <systemPath>${env.DAALROOT}/lib/onedal.jar</systemPath>
         </dependency>
 
         <dependency>
@@ -218,14 +225,11 @@
                         <resource>
                             <directory>${env.CCL_ROOT}/lib</directory>
                             <includes>
-                                <!--<include>libpmi.so.1</include>-->
-                                <!--<include>libresizable_pmi.so.1</include>-->
-                                <include>libmpi.so.12.0.0</include>
-                                <include>libfabric.so.1</include>
-				<include>libccl.so</include>
-                                <!--<include>libccl_atl_ofi.so.1</include>-->
+				                <include>${ccl.lib}</include>
+                                <include>${ccl.mpi.lib}</include>
+                                <include>${ccl.fabric.lib}</include>
                             </includes>
-                        </resource>
+                        </resource>                        
                         <resource>
                             <directory>${env.CCL_ROOT}/lib/prov</directory>
                             <includes>
@@ -236,8 +240,8 @@
                             <!-- For https://github.com/oneapi-src/oneDAL/issues/1254 -->
                             <directory>${env.TBBROOT}/lib/intel64/gcc4.8</directory>
                             <includes>
-                                <include>libtbb.so.12.1</include>
-                                <include>libtbbmalloc.so.2.1</include>
+                                <include>${tbb.lib}</include>
+                                <include>${tbb.malloc.lib}</include>
                             </includes>
                         </resource>
                         <resource>
@@ -263,23 +267,19 @@
                         <configuration>
                             <fileSets>
                                 <!-- oneDAL Java API doesn't load correct libtbb version for oneAPI Beta 10,
-                                rename to workaround. See https://github.com/oneapi-src/oneDAL/issues/1254 -->
+                                rename to workaround. See https://github.com/oneapi-src/oneDAL/issues/1254 -->                                
                                 <fileSet>
-                                    <sourceFile>${project.build.testOutputDirectory}/lib/libtbb.so.12.1</sourceFile>
+                                    <sourceFile>${project.build.testOutputDirectory}/lib/${tbb.lib}</sourceFile>
                                     <destinationFile>${project.build.testOutputDirectory}/lib/libtbb.so.2</destinationFile>
                                 </fileSet>
                                 <fileSet>
-                                    <sourceFile>${project.build.testOutputDirectory}/lib/libtbbmalloc.so.2.1</sourceFile>
+                                    <sourceFile>${project.build.testOutputDirectory}/lib/${tbb.malloc.lib}</sourceFile>
                                     <destinationFile>${project.build.testOutputDirectory}/lib/libtbbmalloc.so.2</destinationFile>
                                 </fileSet>
                                 <fileSet>
-                                    <sourceFile>${project.build.testOutputDirectory}/lib/libmpi.so.12.0.0</sourceFile>
+                                    <sourceFile>${project.build.testOutputDirectory}/lib/${ccl.mpi.lib}</sourceFile>
                                     <destinationFile>${project.build.testOutputDirectory}/lib/libmpi.so.12</destinationFile>
                                 </fileSet>
-                                <!--<fileSet>-->
-                                    <!--<sourceFile>${project.build.testOutputDirectory}/lib/libccl_atl_ofi.so.1</sourceFile>-->
-                                    <!--<destinationFile>${project.build.testOutputDirectory}/lib/libccl_atl_ofi.so</destinationFile>-->
-                                <!--</fileSet>-->
                             </fileSets>
                         </configuration>
                     </execution>
diff --git a/mllib-dal/src/assembly/assembly.xml b/mllib-dal/src/assembly/assembly.xml
index 498b90e02..541c4f2bd 100644
--- a/mllib-dal/src/assembly/assembly.xml
+++ b/mllib-dal/src/assembly/assembly.xml
@@ -41,28 +41,28 @@
     <files>
         <!-- Include TBB libraries into JAR  -->
         <file>
-            <source>${env.TBBROOT}/lib/intel64/gcc4.8/libtbb.so.12.1</source>
+            <source>${env.TBBROOT}/lib/intel64/gcc4.8/${tbb.lib}</source>
             <outputDirectory>lib</outputDirectory>
             <destName>libtbb.so.2</destName>
         </file>
         <file>
-            <source>${env.TBBROOT}/lib/intel64/gcc4.8/libtbbmalloc.so.2.1</source>
+            <source>${env.TBBROOT}/lib/intel64/gcc4.8/${tbb.malloc.lib}</source>
             <outputDirectory>lib</outputDirectory>
             <destName>libtbbmalloc.so.2</destName>
         </file>
         <!-- Include DAL libraries into JAR  -->
         <file>
-            <source>${env.DAALROOT}/lib/intel64/libJavaAPI.so.1.0</source>
+            <source>${env.DAALROOT}/lib/intel64/${dal.java.lib}</source>
             <outputDirectory>lib</outputDirectory>
             <destName>libJavaAPI.so</destName>
         </file>
         <!-- Include oneCCL libraries into JAR  -->
         <file>
-            <source>${env.CCL_ROOT}/lib/libfabric.so.1</source>
+            <source>${env.CCL_ROOT}/lib/${ccl.fabric.lib}</source>
             <outputDirectory>lib</outputDirectory>
         </file>
         <file>
-            <source>${env.CCL_ROOT}/lib/libmpi.so.12.0.0</source>
+            <source>${env.CCL_ROOT}/lib/${ccl.mpi.lib}</source>
             <outputDirectory>lib</outputDirectory>
             <destName>libmpi.so.12</destName>
         </file>
@@ -75,4 +75,4 @@
             <outputDirectory>lib</outputDirectory>
         </file>
     </files>
-</assembly>
\ No newline at end of file
+</assembly>
diff --git a/mllib-dal/src/main/native/ALSDALImpl.cpp b/mllib-dal/src/main/native/ALSDALImpl.cpp
index 53212dc1d..29162fddd 100644
--- a/mllib-dal/src/main/native/ALSDALImpl.cpp
+++ b/mllib-dal/src/main/native/ALSDALImpl.cpp
@@ -613,8 +613,8 @@ JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_recommendation_ALSDALImpl_cDALI
 
   std::cout << "\n=== Results for Rank " << rankId << "===\n" << std::endl;
   // std::cout << "Partition ID: " << partitionId << std::endl;
-  printNumericTable(pUser, "User Factors (first 10 rows):", 10);
-  printNumericTable(pItem, "Item Factors (first 10 rows):", 10);
+  printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10, 20);
+  printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10, 20);
   std::cout << "User Offset: " << getOffsetFromOffsetTable(userOffset) << std::endl;
   std::cout << "Item Offset: " << getOffsetFromOffsetTable(itemOffset) << std::endl;
   std::cout << std::endl;
diff --git a/mllib-dal/src/main/native/Makefile b/mllib-dal/src/main/native/Makefile
index 23222e646..e3a7e2161 100644
--- a/mllib-dal/src/main/native/Makefile
+++ b/mllib-dal/src/main/native/Makefile
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-CC := gcc
-CXX := g++
+CC := clang
+CXX := clang++
 RM := rm -rf
 
-CFLAGS := -g -Wall -fPIC -std=c++11
+CFLAGS := -g -Wall -Wno-deprecated-declarations -fPIC -std=c++11
 
 # The following paths setting works for self-built libs from source code
 # https://github.com/oneapi-src/oneCCL. If oneCCL package in oneAPI Toolkit is used,
@@ -33,7 +33,7 @@ INCS := -I $(JAVA_HOME)/include \
 
 LIBS := -L${CCL_ROOT}/lib -lccl \
         -L$(DAALROOT)/lib/intel64 -l:libdaal_core.a -l:libdaal_thread.a \
-        -L$(TBBROOT)/lib -ltbb -ltbbmalloc
+        -L$(TBBROOT)/lib/intel64/gcc4.8 -ltbb -ltbbmalloc
 #        TODO: Add signal chaining support, should fix linking, package so and loading
 #        -L$(JAVA_HOME)/jre/lib/amd64 -ljsig
 
diff --git a/mllib-dal/src/main/native/PCADALImpl.cpp b/mllib-dal/src/main/native/PCADALImpl.cpp
index 33e2bc95d..95172d05f 100644
--- a/mllib-dal/src/main/native/PCADALImpl.cpp
+++ b/mllib-dal/src/main/native/PCADALImpl.cpp
@@ -125,8 +125,8 @@ JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_feature_PCADALImpl_cPCATrainDAL
     std::cout << "PCA (native): master step took " << duration << " secs" << std::endl;
 
     /* Print the results */
-    printNumericTable(result->get(pca::eigenvalues), "First 10 Eigenvalues:", 10);
-    printNumericTable(result->get(pca::eigenvectors), "First 10 Eigenvectors:", 10);
+    printNumericTable(result->get(pca::eigenvalues), "First 10 eigenvalues with first 20 dimensions:", 10, 20);
+    printNumericTable(result->get(pca::eigenvectors), "First 10 eigenvectors with first 20 dimensions:", 10, 20);
 
     // Return all eigenvalues & eigenvectors
 
diff --git a/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala b/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala
index aa8eb8979..0dd43d24f 100644
--- a/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala
+++ b/mllib-dal/src/main/scala/org/apache/spark/ml/util/Utils.scala
@@ -72,6 +72,12 @@ object Utils {
   }
 
   def checkExecutorAvailPort(data: RDD[_], localIP: String) : Int = {
+
+    if (localIP == "127.0.0.1" || localIP == "127.0.1.1") {
+      println(s"\nOneCCL: Error: doesn't support loopback IP ${localIP}, please assign IP address to your host.\n")
+      System.exit(-1)
+    }
+
     val sc = data.sparkContext
     val result = data.mapPartitions { p =>
       LibLoader.loadLibraries()
diff --git a/mllib-dal/test-cluster.sh b/mllib-dal/test-cluster.sh
deleted file mode 100755
index 4f5a6132a..000000000
--- a/mllib-dal/test-cluster.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-#!/usr/bin/env bash
-
-cd ../dev/test-cluster/workloads
-
-./run-kmeans-pyspark.sh
diff --git a/mllib-dal/test.sh b/mllib-dal/test.sh
index 0157c22a4..befd66495 100755
--- a/mllib-dal/test.sh
+++ b/mllib-dal/test.sh
@@ -2,7 +2,12 @@
 
 # Check envs for building
 if [[ -z $JAVA_HOME ]]; then
- echo $JAVA_HOME not defined!
+ echo JAVA_HOME not defined!
+ exit 1
+fi
+
+if [[ -z $(which mvn) ]]; then
+ echo Maven not found!
  exit 1
 fi
 
@@ -21,16 +26,17 @@ if [[ -z $CCL_ROOT ]]; then
  exit 1
 fi
 
-echo === Building Environments ===
+echo === Testing Environments ===
 echo JAVA_HOME=$JAVA_HOME
 echo DAALROOT=$DAALROOT
 echo TBBROOT=$TBBROOT
 echo CCL_ROOT=$CCL_ROOT
-echo GCC Version: $(gcc -dumpversion)
+echo Maven Version: $(mvn -v | head -n 1 | cut -f3 -d" ")
+echo Clang Version: $(clang -dumpversion)
 echo =============================
 
 # Enable signal chaining support for JNI
-export LD_PRELOAD=$JAVA_HOME/jre/lib/amd64/libjsig.so
+# export LD_PRELOAD=$JAVA_HOME/jre/lib/amd64/libjsig.so
 
 # -Dtest=none to turn off the Java tests