Skip to content

Commit

Permalink
[ML-282] Refactor CPU & GPU examples (#306)
Browse files Browse the repository at this point in the history
* First move

* Move device discover for scala

* Delete old gpu discover

* Add run-all-gpu

* Add clean up

* Add tmp utils file

* Add exe

* Rename run script

* Scala gpu donw

* Scala cpu done

* For ci

* pyspark ci

* Rename scala

* Rename scala file in scripts

* Pyspark unit done

* Update pyspark utils

* Update ci

* Remove tmp utils

* Reaname utils

* Change absolute path, rm als gpu.sh

* Scala absolute path

* Change sanity check

* Rename ci

* Split random_forest

* Fix name change in ci

* Fix path typo

* Fix typo
  • Loading branch information
argentea authored Jun 27, 2023
1 parent f4e6b49 commit e19ab80
Show file tree
Hide file tree
Showing 64 changed files with 212 additions and 90 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ jobs:
${{ runner.os }}-
- name: Cluster Test
run: |
${{github.workspace}}/dev/ci/ci-yarn-test.sh
${{github.workspace}}/dev/ci/ci-yarn-test-cpu.sh
standalone-test:
name: Standalone CPU_GPU_PROFILE Test for Examples (CPU)
runs-on: ubuntu-20.04
Expand All @@ -95,4 +95,4 @@ jobs:
${{ runner.os }}-
- name: Cluster Test
run: |
${{github.workspace}}/dev/ci/ci-standalone-test.sh
${{github.workspace}}/dev/ci/ci-standalone-test-cpu.sh
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,8 @@ Edit related variables in "`Minimun Settings`" of `env.sh`
#### Run K-means

```bash
$ cd examples/kmeans
$ ./build.sh
$ ./run.sh
$ cd examples/python/kmeans-pyspark
$ ./run-cpu.sh
```

### PySpark Support
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ echo "========================================="

# Build and run all examples
./build-all-scala.sh
./run-all-scala.sh
./run-all-pyspark.sh
./run-all-scala-cpu.sh
./run-all-pyspark-cpu.sh
4 changes: 2 additions & 2 deletions dev/ci/ci-yarn-test.sh → dev/ci/ci-yarn-test-cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,5 @@ echo "========================================="

# Build and run all examples
./build-all-scala.sh
./run-all-scala.sh
./run-all-pyspark.sh
./run-all-scala-cpu.sh
./run-all-pyspark-cpu.sh
7 changes: 6 additions & 1 deletion examples/build-all-scala.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
#!/usr/bin/env bash

exampleDirs=(kmeans pca als naive-bayes linear-regression correlation summarizer)
exampleDirs=(kmeans-scala pca-scala als-scala naive-bayes-scala \
linear-regression-scala correlation-scala summarizer-scala)

cd scala

for dir in ${exampleDirs[*]}
do
Expand All @@ -13,3 +16,5 @@ do
./build.sh
cd ..
done

cd ..
20 changes: 20 additions & 0 deletions examples/clean-all-scala-targets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

exampleDirs=(kmeans-scala pca-scala als-scala naive-bayes-scala \
linear-regression-scala correlation-scala summarizer-scala)

cd scala

for dir in ${exampleDirs[*]}
do
cd $dir
echo
echo ==========================
echo Cleaning $dir ...
echo ==========================
echo
rm -rf ./target/
cd ..
done

cd ..
1 change: 0 additions & 1 deletion examples/kmeans/IntelGpuResourceFile.json

This file was deleted.

20 changes: 0 additions & 20 deletions examples/linear-regression/GetIntelGpuResources.sh

This file was deleted.

1 change: 0 additions & 1 deletion examples/linear-regression/IntelGpuResourceFile.json

This file was deleted.

20 changes: 0 additions & 20 deletions examples/pca/GetIntelGpuResources.sh

This file was deleted.

1 change: 0 additions & 1 deletion examples/pca/IntelGpuResourceFile.json

This file was deleted.

File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env bash

source ../../conf/env.sh
CONF_PATH=$PWD/../../../conf
source $CONF_PATH/env.sh

# Data file is converted from oneDAL examples ($DAALROOT/examples/daal/data/batch/implicit_als_csr.csv)
# The data file should be copied to $HDFS_ROOT before running examples
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env bash

source ../../conf/env.sh
CONF_PATH=$PWD/../../../conf
source $CONF_PATH/env.sh

# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data
# The data file should be copied to $HDFS_ROOT before running examples
Expand Down
40 changes: 40 additions & 0 deletions examples/python/kmeans-pyspark/run-gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash

CONF_PATH=$PWD/../../../conf
source $CONF_PATH/env.sh

# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data
# The data file should be copied to $HDFS_ROOT before running examples
DATA_FILE=$HDFS_ROOT/data/sample_kmeans_data.txt

DEVICE=GPU
RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json
WORKER_GPU_AMOUNT=4
EXECUTOR_GPU_AMOUNT=1
TASK_GPU_AMOUNT=1
APP_PY=kmeans-pyspark.py


# Should run in standalone mode
time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \
--num-executors $SPARK_NUM_EXECUTORS \
--executor-cores $SPARK_EXECUTOR_CORES \
--total-executor-cores $SPARK_TOTAL_CORES \
--driver-memory $SPARK_DRIVER_MEMORY \
--executor-memory $SPARK_EXECUTOR_MEMORY \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
--conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
--conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
--conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
--conf "spark.oap.mllib.device=$DEVICE" \
--conf "spark.worker.resourcesFile=$RESOURCE_FILE" \
--conf "spark.worker.resource.gpu.amount=$WORKER_GPU_AMOUNT" \
--conf "spark.executor.resource.gpu.amount=$EXECUTOR_GPU_AMOUNT" \
--conf "spark.task.resource.gpu.amount=$TASK_GPU_AMOUNT" \
--conf "spark.shuffle.reduceLocality.enabled=false" \
--conf "spark.network.timeout=1200s" \
--conf "spark.task.maxFailures=1" \
--jars $OAP_MLLIB_JAR \
$APP_PY $DATA_FILE \
2>&1 | tee KMeans-$(date +%m%d_%H_%M_%S).log
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env bash

source ../../conf/env.sh
CONF_PATH=$PWD/../../../conf
source $CONF_PATH/env.sh

# CSV data is the same as in Spark example "ml/pca_example.py"
# The data file should be copied to $HDFS_ROOT before running examples
Expand Down
40 changes: 40 additions & 0 deletions examples/python/pca-pyspark/run-gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash

CONF_PATH=$PWD/../../../conf
source $CONF_PATH/env.sh

# CSV data is the same as in Spark example "ml/pca_example.py"
# The data file should be copied to $HDFS_ROOT before running examples
DATA_FILE=$HDFS_ROOT/data/pca_data.csv

DEVICE=GPU
RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json
WORKER_GPU_AMOUNT=4
EXECUTOR_GPU_AMOUNT=1
TASK_GPU_AMOUNT=1
APP_PY=pca-pyspark.py


# Should run in standalone mode
time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \
--num-executors $SPARK_NUM_EXECUTORS \
--executor-cores $SPARK_EXECUTOR_CORES \
--total-executor-cores $SPARK_TOTAL_CORES \
--driver-memory $SPARK_DRIVER_MEMORY \
--executor-memory $SPARK_EXECUTOR_MEMORY \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \
--conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \
--conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \
--conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \
--conf "spark.oap.mllib.device=$DEVICE" \
--conf "spark.worker.resourcesFile=$RESOURCE_FILE" \
--conf "spark.worker.resource.gpu.amount=$WORKER_GPU_AMOUNT" \
--conf "spark.executor.resource.gpu.amount=$EXECUTOR_GPU_AMOUNT" \
--conf "spark.task.resource.gpu.amount=$TASK_GPU_AMOUNT" \
--conf "spark.shuffle.reduceLocality.enabled=false" \
--conf "spark.network.timeout=1200s" \
--conf "spark.task.maxFailures=1" \
--jars $OAP_MLLIB_JAR \
$APP_PY $DATA_FILE \
2>&1 | tee PCA-$(date +%m%d_%H_%M_%S).log
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
#!/usr/bin/env bash

source ../../conf/env.sh
CONF_PATH=$PWD/../../../conf
source $CONF_PATH/env.sh

# CSV data is the same as in Spark example "ml/pca_example.py"
# The data file should be copied to $HDFS_ROOT before running examples
DATA_FILE=$HDFS_ROOT/data/sample_libsvm_data.txt

DEVICE=GPU
RESOURCE_FILE=$PWD/IntelGpuResourceFile.json
RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json
WORKER_GPU_AMOUNT=4
EXECUTOR_GPU_AMOUNT=1
TASK_GPU_AMOUNT=1
APP_PY=random_forest_classifier_example.py


# Should run in standalone mode
time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \
--num-executors $SPARK_NUM_EXECUTORS \
--executor-cores $SPARK_EXECUTOR_CORES \
Expand All @@ -34,5 +36,5 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \
--conf "spark.network.timeout=1200s" \
--conf "spark.task.maxFailures=1" \
--jars $OAP_MLLIB_JAR \
$APP_PY DATA_FILE \
$APP_PY $DATA_FILE \
2>&1 | tee random_forest_classifier-$(date +%m%d_%H_%M_%S).log
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
#!/usr/bin/env bash

source ../../conf/env.sh
CONF_PATH=$PWD/../../../conf
source $CONF_PATH/env.sh

# CSV data is the same as in Spark example "ml/pca_example.py"
# The data file should be copied to $HDFS_ROOT before running examples
DATA_FILE=$HDFS_ROOT/data/sample_libsvm_data.txt

DEVICE=GPU
RESOURCE_FILE=$PWD/IntelGpuResourceFile.json
RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json
WORKER_GPU_AMOUNT=4
EXECUTOR_GPU_AMOUNT=1
TASK_GPU_AMOUNT=1
APP_PY=random_forest_regressor_example.py


# Should run in standalone mode
time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \
--num-executors $SPARK_NUM_EXECUTORS \
--executor-cores $SPARK_EXECUTOR_CORES \
Expand All @@ -34,5 +36,5 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \
--conf "spark.network.timeout=1200s" \
--conf "spark.task.maxFailures=1" \
--jars $OAP_MLLIB_JAR \
$APP_PY DATA_FILE \
$APP_PY $DATA_FILE \
2>&1 | tee random_forest_regressor-$(date +%m%d_%H_%M_%S).log
1 change: 0 additions & 1 deletion examples/random-forest-pyspark/IntelGpuResourceFile.json

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

exampleDirs=(kmeans-pyspark pca-pyspark als-pyspark)

cd python

for dir in ${exampleDirs[*]}
do
cd $dir
Expand All @@ -10,6 +12,8 @@ do
echo Running $dir ...
echo ==========================
echo
./run.sh
./run-cpu.sh
cd ..
done

cd ..
20 changes: 20 additions & 0 deletions examples/run-all-pyspark-gpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

exampleDirs=(kmeans-pyspark pca-pyspark als-pyspark \
random-forest-regressor-pyspark random-forest-classifier-pyspark)

cd python

for dir in ${exampleDirs[*]}
do
cd $dir
echo
echo ==========================
echo Running $dir ...
echo ==========================
echo
./run-gpu.sh
cd ..
done

cd ..
20 changes: 20 additions & 0 deletions examples/run-all-scala-cpu.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env bash

exampleDirs=(kmeans-scala pca-scala als-scala naive-bayes-scala \
linear-regression-scala correlation-scala summarizer-scala)

cd scala

for dir in ${exampleDirs[*]}
do
cd $dir
echo
echo ==========================
echo Running $dir ...
echo ==========================
echo
./run-cpu.sh
cd ..
done

cd ..
8 changes: 6 additions & 2 deletions examples/run-all-scala.sh → examples/run-all-scala-gpu.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/usr/bin/env bash

exampleDirs=(kmeans pca als naive-bayes linear-regression correlation summarizer)
exampleDirs=(kmeans-scala pca-scala linear-regression-scala correlation-scala summarizer-scala)

cd scala

for dir in ${exampleDirs[*]}
do
Expand All @@ -10,6 +12,8 @@ do
echo Running $dir ...
echo ==========================
echo
./run.sh
./run-gpu.sh
cd ..
done

cd ..
File renamed without changes.
File renamed without changes.
3 changes: 2 additions & 1 deletion examples/als/run.sh → examples/scala/als-scala/run-cpu.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env bash

source ../../conf/env.sh
CONF_PATH=$PWD/../../../conf
source $CONF_PATH/env.sh

# Data file is converted from oneDAL examples ($DAALROOT/examples/daal/data/batch/implicit_als_csr.csv)
# The data file should be copied to $HDFS_ROOT before running examples
Expand Down
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit e19ab80

Please sign in to comment.