diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index c3f18a2f5..8d8fe8c9d 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -72,7 +72,7 @@ jobs: ${{ runner.os }}- - name: Cluster Test run: | - ${{github.workspace}}/dev/ci/ci-yarn-test.sh + ${{github.workspace}}/dev/ci/ci-yarn-test-cpu.sh standalone-test: name: Standalone CPU_GPU_PROFILE Test for Examples (CPU) runs-on: ubuntu-20.04 @@ -95,4 +95,4 @@ jobs: ${{ runner.os }}- - name: Cluster Test run: | - ${{github.workspace}}/dev/ci/ci-standalone-test.sh + ${{github.workspace}}/dev/ci/ci-standalone-test-cpu.sh diff --git a/README.md b/README.md index 0bd6cf2f0..e151c9574 100644 --- a/README.md +++ b/README.md @@ -131,9 +131,8 @@ Edit related variables in "`Minimun Settings`" of `env.sh` #### Run K-means ```bash - $ cd examples/kmeans - $ ./build.sh - $ ./run.sh + $ cd examples/python/kmeans-pyspark + $ ./run-cpu.sh ``` ### PySpark Support diff --git a/examples/kmeans/GetIntelGpuResources.sh b/conf/GetIntelGpuResources.sh similarity index 100% rename from examples/kmeans/GetIntelGpuResources.sh rename to conf/GetIntelGpuResources.sh diff --git a/examples/correlation/IntelGpuResourceFile.json b/conf/IntelGpuResourceFile.json similarity index 100% rename from examples/correlation/IntelGpuResourceFile.json rename to conf/IntelGpuResourceFile.json diff --git a/dev/ci/ci-standalone-test.sh b/dev/ci/ci-standalone-test-cpu.sh similarity index 95% rename from dev/ci/ci-standalone-test.sh rename to dev/ci/ci-standalone-test-cpu.sh index a0a8ae2fe..c3322f9ec 100755 --- a/dev/ci/ci-standalone-test.sh +++ b/dev/ci/ci-standalone-test-cpu.sh @@ -33,5 +33,5 @@ echo "=========================================" # Build and run all examples ./build-all-scala.sh -./run-all-scala.sh -./run-all-pyspark.sh +./run-all-scala-cpu.sh +./run-all-pyspark-cpu.sh diff --git a/dev/ci/ci-yarn-test.sh b/dev/ci/ci-yarn-test-cpu.sh similarity index 95% rename from dev/ci/ci-yarn-test.sh rename to dev/ci/ci-yarn-test-cpu.sh index e9a93aee3..e586285d9 100755 --- a/dev/ci/ci-yarn-test.sh +++ b/dev/ci/ci-yarn-test-cpu.sh @@ -36,5 +36,5 @@ echo "=========================================" # Build and run all examples ./build-all-scala.sh -./run-all-scala.sh -./run-all-pyspark.sh +./run-all-scala-cpu.sh +./run-all-pyspark-cpu.sh diff --git a/examples/build-all-scala.sh b/examples/build-all-scala.sh index 8babb9d97..9c7662701 100755 --- a/examples/build-all-scala.sh +++ b/examples/build-all-scala.sh @@ -1,6 +1,9 @@ #!/usr/bin/env bash -exampleDirs=(kmeans pca als naive-bayes linear-regression correlation summarizer) +exampleDirs=(kmeans-scala pca-scala als-scala naive-bayes-scala \ + linear-regression-scala correlation-scala summarizer-scala) + +cd scala for dir in ${exampleDirs[*]} do @@ -13,3 +16,5 @@ do ./build.sh cd .. done + +cd .. diff --git a/examples/clean-all-scala-targets.sh b/examples/clean-all-scala-targets.sh new file mode 100644 index 000000000..c7eae002e --- /dev/null +++ b/examples/clean-all-scala-targets.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +exampleDirs=(kmeans-scala pca-scala als-scala naive-bayes-scala \ + linear-regression-scala correlation-scala summarizer-scala) + +cd scala + +for dir in ${exampleDirs[*]} +do + cd $dir + echo + echo ========================== + echo Cleaning $dir ... + echo ========================== + echo + rm -rf ./target/ + cd .. +done + +cd .. diff --git a/examples/kmeans/IntelGpuResourceFile.json b/examples/kmeans/IntelGpuResourceFile.json deleted file mode 100644 index 4b5c3cc98..000000000 --- a/examples/kmeans/IntelGpuResourceFile.json +++ /dev/null @@ -1 +0,0 @@ -[{"id":{"componentName": "spark.worker","resourceName":"gpu"},"addresses":["0","1","2","3"]}] diff --git a/examples/linear-regression/GetIntelGpuResources.sh b/examples/linear-regression/GetIntelGpuResources.sh deleted file mode 100755 index 212fd24f1..000000000 --- a/examples/linear-regression/GetIntelGpuResources.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -# This script is a basic example script to get resource information about NVIDIA GPUs. -# It assumes the drivers are properly installed and the nvidia-smi command is available. -# It is not guaranteed to work on all setups so please test and customize as needed -# for your environment. It can be passed into SPARK via the config -# spark.{driver/executor}.resource.gpu.discoveryScript to allow the driver or executor to discover -# the GPUs it was allocated. It assumes you are running within an isolated container where the -# GPUs are allocated exclusively to that driver or executor. -# It outputs a JSON formatted string that is expected by the -# spark.{driver/executor}.resource.gpu.discoveryScript config. -# -# Example output: {"name": "gpu", "addresses":["0","1","2","3","4","5","6","7"]} - -#ADDRS=`nvidia-smi --query-gpu=index --format=csv,noheader | sed -e ':a' -e 'N' -e'$!ba' -e 's/\n/","/g'` -#echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]} -#ADDRS="0","1","2","3","4","5","6","7" -#echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]} - -echo {\"name\": \"gpu\", \"addresses\":[\"0\",\"1\",\"2\",\"3\"]} diff --git a/examples/linear-regression/IntelGpuResourceFile.json b/examples/linear-regression/IntelGpuResourceFile.json deleted file mode 100644 index 4b5c3cc98..000000000 --- a/examples/linear-regression/IntelGpuResourceFile.json +++ /dev/null @@ -1 +0,0 @@ -[{"id":{"componentName": "spark.worker","resourceName":"gpu"},"addresses":["0","1","2","3"]}] diff --git a/examples/pca/GetIntelGpuResources.sh b/examples/pca/GetIntelGpuResources.sh deleted file mode 100755 index 212fd24f1..000000000 --- a/examples/pca/GetIntelGpuResources.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env bash - -# This script is a basic example script to get resource information about NVIDIA GPUs. -# It assumes the drivers are properly installed and the nvidia-smi command is available. -# It is not guaranteed to work on all setups so please test and customize as needed -# for your environment. It can be passed into SPARK via the config -# spark.{driver/executor}.resource.gpu.discoveryScript to allow the driver or executor to discover -# the GPUs it was allocated. It assumes you are running within an isolated container where the -# GPUs are allocated exclusively to that driver or executor. -# It outputs a JSON formatted string that is expected by the -# spark.{driver/executor}.resource.gpu.discoveryScript config. -# -# Example output: {"name": "gpu", "addresses":["0","1","2","3","4","5","6","7"]} - -#ADDRS=`nvidia-smi --query-gpu=index --format=csv,noheader | sed -e ':a' -e 'N' -e'$!ba' -e 's/\n/","/g'` -#echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]} -#ADDRS="0","1","2","3","4","5","6","7" -#echo {\"name\": \"gpu\", \"addresses\":[\"$ADDRS\"]} - -echo {\"name\": \"gpu\", \"addresses\":[\"0\",\"1\",\"2\",\"3\"]} diff --git a/examples/pca/IntelGpuResourceFile.json b/examples/pca/IntelGpuResourceFile.json deleted file mode 100644 index 4b5c3cc98..000000000 --- a/examples/pca/IntelGpuResourceFile.json +++ /dev/null @@ -1 +0,0 @@ -[{"id":{"componentName": "spark.worker","resourceName":"gpu"},"addresses":["0","1","2","3"]}] diff --git a/examples/als-pyspark/als-pyspark.py b/examples/python/als-pyspark/als-pyspark.py similarity index 100% rename from examples/als-pyspark/als-pyspark.py rename to examples/python/als-pyspark/als-pyspark.py diff --git a/examples/als-pyspark/run.sh b/examples/python/als-pyspark/run-cpu.sh similarity index 95% rename from examples/als-pyspark/run.sh rename to examples/python/als-pyspark/run-cpu.sh index f06bf1bad..7edcc6f57 100755 --- a/examples/als-pyspark/run.sh +++ b/examples/python/als-pyspark/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is converted from oneDAL examples ($DAALROOT/examples/daal/data/batch/implicit_als_csr.csv) # The data file should be copied to $HDFS_ROOT before running examples diff --git a/examples/kmeans-pyspark/kmeans-pyspark.py b/examples/python/kmeans-pyspark/kmeans-pyspark.py similarity index 100% rename from examples/kmeans-pyspark/kmeans-pyspark.py rename to examples/python/kmeans-pyspark/kmeans-pyspark.py diff --git a/examples/kmeans-pyspark/run.sh b/examples/python/kmeans-pyspark/run-cpu.sh similarity index 95% rename from examples/kmeans-pyspark/run.sh rename to examples/python/kmeans-pyspark/run-cpu.sh index f0184d9f5..b78f15cb6 100755 --- a/examples/kmeans-pyspark/run.sh +++ b/examples/python/kmeans-pyspark/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data # The data file should be copied to $HDFS_ROOT before running examples diff --git a/examples/python/kmeans-pyspark/run-gpu.sh b/examples/python/kmeans-pyspark/run-gpu.sh new file mode 100755 index 000000000..81c29b280 --- /dev/null +++ b/examples/python/kmeans-pyspark/run-gpu.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh + +# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data +# The data file should be copied to $HDFS_ROOT before running examples +DATA_FILE=$HDFS_ROOT/data/sample_kmeans_data.txt + +DEVICE=GPU +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json +WORKER_GPU_AMOUNT=4 +EXECUTOR_GPU_AMOUNT=1 +TASK_GPU_AMOUNT=1 +APP_PY=kmeans-pyspark.py + + +# Should run in standalone mode +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ + --num-executors $SPARK_NUM_EXECUTORS \ + --executor-cores $SPARK_EXECUTOR_CORES \ + --total-executor-cores $SPARK_TOTAL_CORES \ + --driver-memory $SPARK_DRIVER_MEMORY \ + --executor-memory $SPARK_EXECUTOR_MEMORY \ + --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ + --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ + --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ + --conf "spark.oap.mllib.device=$DEVICE" \ + --conf "spark.worker.resourcesFile=$RESOURCE_FILE" \ + --conf "spark.worker.resource.gpu.amount=$WORKER_GPU_AMOUNT" \ + --conf "spark.executor.resource.gpu.amount=$EXECUTOR_GPU_AMOUNT" \ + --conf "spark.task.resource.gpu.amount=$TASK_GPU_AMOUNT" \ + --conf "spark.shuffle.reduceLocality.enabled=false" \ + --conf "spark.network.timeout=1200s" \ + --conf "spark.task.maxFailures=1" \ + --jars $OAP_MLLIB_JAR \ + $APP_PY $DATA_FILE \ + 2>&1 | tee KMeans-$(date +%m%d_%H_%M_%S).log diff --git a/examples/pca-pyspark/pca-pyspark.py b/examples/python/pca-pyspark/pca-pyspark.py similarity index 100% rename from examples/pca-pyspark/pca-pyspark.py rename to examples/python/pca-pyspark/pca-pyspark.py diff --git a/examples/pca-pyspark/run.sh b/examples/python/pca-pyspark/run-cpu.sh similarity index 95% rename from examples/pca-pyspark/run.sh rename to examples/python/pca-pyspark/run-cpu.sh index 385ce5ca9..5f2bedee4 100755 --- a/examples/pca-pyspark/run.sh +++ b/examples/python/pca-pyspark/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # CSV data is the same as in Spark example "ml/pca_example.py" # The data file should be copied to $HDFS_ROOT before running examples diff --git a/examples/python/pca-pyspark/run-gpu.sh b/examples/python/pca-pyspark/run-gpu.sh new file mode 100755 index 000000000..164d6fe11 --- /dev/null +++ b/examples/python/pca-pyspark/run-gpu.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh + +# CSV data is the same as in Spark example "ml/pca_example.py" +# The data file should be copied to $HDFS_ROOT before running examples +DATA_FILE=$HDFS_ROOT/data/pca_data.csv + +DEVICE=GPU +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json +WORKER_GPU_AMOUNT=4 +EXECUTOR_GPU_AMOUNT=1 +TASK_GPU_AMOUNT=1 +APP_PY=pca-pyspark.py + + +# Should run in standalone mode +time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ + --num-executors $SPARK_NUM_EXECUTORS \ + --executor-cores $SPARK_EXECUTOR_CORES \ + --total-executor-cores $SPARK_TOTAL_CORES \ + --driver-memory $SPARK_DRIVER_MEMORY \ + --executor-memory $SPARK_EXECUTOR_MEMORY \ + --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ + --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ + --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ + --conf "spark.oap.mllib.device=$DEVICE" \ + --conf "spark.worker.resourcesFile=$RESOURCE_FILE" \ + --conf "spark.worker.resource.gpu.amount=$WORKER_GPU_AMOUNT" \ + --conf "spark.executor.resource.gpu.amount=$EXECUTOR_GPU_AMOUNT" \ + --conf "spark.task.resource.gpu.amount=$TASK_GPU_AMOUNT" \ + --conf "spark.shuffle.reduceLocality.enabled=false" \ + --conf "spark.network.timeout=1200s" \ + --conf "spark.task.maxFailures=1" \ + --jars $OAP_MLLIB_JAR \ + $APP_PY $DATA_FILE \ + 2>&1 | tee PCA-$(date +%m%d_%H_%M_%S).log diff --git a/examples/random-forest-pyspark/random_forest_classifier_example.py b/examples/python/random-forest-classifier-pyspark/random_forest_classifier_example.py similarity index 100% rename from examples/random-forest-pyspark/random_forest_classifier_example.py rename to examples/python/random-forest-classifier-pyspark/random_forest_classifier_example.py diff --git a/examples/random-forest-pyspark/run-gpu-standalone.sh b/examples/python/random-forest-classifier-pyspark/run-gpu.sh similarity index 90% rename from examples/random-forest-pyspark/run-gpu-standalone.sh rename to examples/python/random-forest-classifier-pyspark/run-gpu.sh index 1b767c6d7..49bf8a7e3 100755 --- a/examples/random-forest-pyspark/run-gpu-standalone.sh +++ b/examples/python/random-forest-classifier-pyspark/run-gpu.sh @@ -1,19 +1,21 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # CSV data is the same as in Spark example "ml/pca_example.py" # The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=$HDFS_ROOT/data/sample_libsvm_data.txt DEVICE=GPU -RESOURCE_FILE=$PWD/IntelGpuResourceFile.json +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 APP_PY=random_forest_classifier_example.py +# Should run in standalone mode time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --num-executors $SPARK_NUM_EXECUTORS \ --executor-cores $SPARK_EXECUTOR_CORES \ @@ -34,5 +36,5 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --conf "spark.network.timeout=1200s" \ --conf "spark.task.maxFailures=1" \ --jars $OAP_MLLIB_JAR \ - $APP_PY DATA_FILE \ + $APP_PY $DATA_FILE \ 2>&1 | tee random_forest_classifier-$(date +%m%d_%H_%M_%S).log diff --git a/examples/random-forest-pyspark/random_forest_regressor_example.py b/examples/python/random-forest-regressor-pyspark/random_forest_regressor_example.py similarity index 100% rename from examples/random-forest-pyspark/random_forest_regressor_example.py rename to examples/python/random-forest-regressor-pyspark/random_forest_regressor_example.py diff --git a/examples/random-forest-pyspark/run-gpu-standalone-regressor.sh b/examples/python/random-forest-regressor-pyspark/run-gpu.sh similarity index 90% rename from examples/random-forest-pyspark/run-gpu-standalone-regressor.sh rename to examples/python/random-forest-regressor-pyspark/run-gpu.sh index 839882da8..753174ddb 100755 --- a/examples/random-forest-pyspark/run-gpu-standalone-regressor.sh +++ b/examples/python/random-forest-regressor-pyspark/run-gpu.sh @@ -1,19 +1,21 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # CSV data is the same as in Spark example "ml/pca_example.py" # The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=$HDFS_ROOT/data/sample_libsvm_data.txt DEVICE=GPU -RESOURCE_FILE=$PWD/IntelGpuResourceFile.json +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 APP_PY=random_forest_regressor_example.py +# Should run in standalone mode time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --num-executors $SPARK_NUM_EXECUTORS \ --executor-cores $SPARK_EXECUTOR_CORES \ @@ -34,5 +36,5 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --conf "spark.network.timeout=1200s" \ --conf "spark.task.maxFailures=1" \ --jars $OAP_MLLIB_JAR \ - $APP_PY DATA_FILE \ + $APP_PY $DATA_FILE \ 2>&1 | tee random_forest_regressor-$(date +%m%d_%H_%M_%S).log diff --git a/examples/random-forest-pyspark/IntelGpuResourceFile.json b/examples/random-forest-pyspark/IntelGpuResourceFile.json deleted file mode 100644 index 4b5c3cc98..000000000 --- a/examples/random-forest-pyspark/IntelGpuResourceFile.json +++ /dev/null @@ -1 +0,0 @@ -[{"id":{"componentName": "spark.worker","resourceName":"gpu"},"addresses":["0","1","2","3"]}] diff --git a/examples/run-all-pyspark.sh b/examples/run-all-pyspark-cpu.sh similarity index 87% rename from examples/run-all-pyspark.sh rename to examples/run-all-pyspark-cpu.sh index dd316c9dd..678c019c7 100755 --- a/examples/run-all-pyspark.sh +++ b/examples/run-all-pyspark-cpu.sh @@ -2,6 +2,8 @@ exampleDirs=(kmeans-pyspark pca-pyspark als-pyspark) +cd python + for dir in ${exampleDirs[*]} do cd $dir @@ -10,6 +12,8 @@ do echo Running $dir ... echo ========================== echo - ./run.sh + ./run-cpu.sh cd .. done + +cd .. diff --git a/examples/run-all-pyspark-gpu.sh b/examples/run-all-pyspark-gpu.sh new file mode 100755 index 000000000..b9d11b4f6 --- /dev/null +++ b/examples/run-all-pyspark-gpu.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +exampleDirs=(kmeans-pyspark pca-pyspark als-pyspark \ + random-forest-regressor-pyspark random-forest-classifier-pyspark) + +cd python + +for dir in ${exampleDirs[*]} +do + cd $dir + echo + echo ========================== + echo Running $dir ... + echo ========================== + echo + ./run-gpu.sh + cd .. +done + +cd .. diff --git a/examples/run-all-scala-cpu.sh b/examples/run-all-scala-cpu.sh new file mode 100755 index 000000000..953031d27 --- /dev/null +++ b/examples/run-all-scala-cpu.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +exampleDirs=(kmeans-scala pca-scala als-scala naive-bayes-scala \ + linear-regression-scala correlation-scala summarizer-scala) + +cd scala + +for dir in ${exampleDirs[*]} +do + cd $dir + echo + echo ========================== + echo Running $dir ... + echo ========================== + echo + ./run-cpu.sh + cd .. +done + +cd .. diff --git a/examples/run-all-scala.sh b/examples/run-all-scala-gpu.sh similarity index 58% rename from examples/run-all-scala.sh rename to examples/run-all-scala-gpu.sh index 04bab7f8a..004303d4d 100755 --- a/examples/run-all-scala.sh +++ b/examples/run-all-scala-gpu.sh @@ -1,6 +1,8 @@ #!/usr/bin/env bash -exampleDirs=(kmeans pca als naive-bayes linear-regression correlation summarizer) +exampleDirs=(kmeans-scala pca-scala linear-regression-scala correlation-scala summarizer-scala) + +cd scala for dir in ${exampleDirs[*]} do @@ -10,6 +12,8 @@ do echo Running $dir ... echo ========================== echo - ./run.sh + ./run-gpu.sh cd .. done + +cd .. diff --git a/examples/als/build.sh b/examples/scala/als-scala/build.sh similarity index 100% rename from examples/als/build.sh rename to examples/scala/als-scala/build.sh diff --git a/examples/als/pom.xml b/examples/scala/als-scala/pom.xml similarity index 100% rename from examples/als/pom.xml rename to examples/scala/als-scala/pom.xml diff --git a/examples/als/run.sh b/examples/scala/als-scala/run-cpu.sh similarity index 96% rename from examples/als/run.sh rename to examples/scala/als-scala/run-cpu.sh index 7ff779b82..3b2a905bf 100755 --- a/examples/als/run.sh +++ b/examples/scala/als-scala/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is converted from oneDAL examples ($DAALROOT/examples/daal/data/batch/implicit_als_csr.csv) # The data file should be copied to $HDFS_ROOT before running examples diff --git a/examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala b/examples/scala/als-scala/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala similarity index 100% rename from examples/als/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala rename to examples/scala/als-scala/src/main/scala/org/apache/spark/examples/ml/ALSExample.scala diff --git a/examples/correlation/build.sh b/examples/scala/correlation-scala/build.sh similarity index 100% rename from examples/correlation/build.sh rename to examples/scala/correlation-scala/build.sh diff --git a/examples/correlation/pom.xml b/examples/scala/correlation-scala/pom.xml similarity index 100% rename from examples/correlation/pom.xml rename to examples/scala/correlation-scala/pom.xml diff --git a/examples/correlation/run.sh b/examples/scala/correlation-scala/run-cpu.sh similarity index 90% rename from examples/correlation/run.sh rename to examples/scala/correlation-scala/run-cpu.sh index a937091ae..aa586244a 100755 --- a/examples/correlation/run.sh +++ b/examples/scala/correlation-scala/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh DEVICE=CPU APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar @@ -24,4 +25,4 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --jars $OAP_MLLIB_JAR \ --class $APP_CLASS \ $APP_JAR $DATA_FILE \ - 2>&1 | tee Correlation-$(date +%m%d_%H_%M_%S).log \ No newline at end of file + 2>&1 | tee Correlation-$(date +%m%d_%H_%M_%S).log diff --git a/examples/correlation/run-gpu-standalone.sh b/examples/scala/correlation-scala/run-gpu.sh similarity index 94% rename from examples/correlation/run-gpu-standalone.sh rename to examples/scala/correlation-scala/run-gpu.sh index 881926fc3..27c422439 100755 --- a/examples/correlation/run-gpu-standalone.sh +++ b/examples/scala/correlation-scala/run-gpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data # The data file should be copied to $HDFS_ROOT before running examples @@ -10,7 +11,7 @@ APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.CorrelationExample DEVICE=GPU -RESOURCE_FILE=$PWD/IntelGpuResourceFile.json +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 diff --git a/examples/correlation/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala b/examples/scala/correlation-scala/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala similarity index 100% rename from examples/correlation/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala rename to examples/scala/correlation-scala/src/main/scala/org/apache/spark/examples/ml/CorrelationExample.scala diff --git a/examples/kmeans/build.sh b/examples/scala/kmeans-scala/build.sh similarity index 100% rename from examples/kmeans/build.sh rename to examples/scala/kmeans-scala/build.sh diff --git a/examples/kmeans/pom.xml b/examples/scala/kmeans-scala/pom.xml similarity index 100% rename from examples/kmeans/pom.xml rename to examples/scala/kmeans-scala/pom.xml diff --git a/examples/kmeans/run.sh b/examples/scala/kmeans-scala/run-cpu.sh similarity index 96% rename from examples/kmeans/run.sh rename to examples/scala/kmeans-scala/run-cpu.sh index c46ecace0..853bfac65 100755 --- a/examples/kmeans/run.sh +++ b/examples/scala/kmeans-scala/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data # The data file should be copied to $HDFS_ROOT before running examples diff --git a/examples/kmeans/run-gpu-standalone.sh b/examples/scala/kmeans-scala/run-gpu.sh similarity index 94% rename from examples/kmeans/run-gpu-standalone.sh rename to examples/scala/kmeans-scala/run-gpu.sh index 28041a1d8..b6707414f 100755 --- a/examples/kmeans/run-gpu-standalone.sh +++ b/examples/scala/kmeans-scala/run-gpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data # The data file should be copied to $HDFS_ROOT before running examples @@ -10,7 +11,7 @@ APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.KMeansExample DEVICE=GPU -RESOURCE_FILE=$PWD/IntelGpuResourceFile.json +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 diff --git a/examples/kmeans/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala b/examples/scala/kmeans-scala/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala similarity index 100% rename from examples/kmeans/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala rename to examples/scala/kmeans-scala/src/main/scala/org/apache/spark/examples/ml/KMeansExample.scala diff --git a/examples/linear-regression/build.sh b/examples/scala/linear-regression-scala/build.sh similarity index 100% rename from examples/linear-regression/build.sh rename to examples/scala/linear-regression-scala/build.sh diff --git a/examples/linear-regression/pom.xml b/examples/scala/linear-regression-scala/pom.xml similarity index 100% rename from examples/linear-regression/pom.xml rename to examples/scala/linear-regression-scala/pom.xml diff --git a/examples/linear-regression/run.sh b/examples/scala/linear-regression-scala/run-cpu.sh similarity index 96% rename from examples/linear-regression/run.sh rename to examples/scala/linear-regression-scala/run-cpu.sh index a0bd82994..a03b2cb84 100755 --- a/examples/linear-regression/run.sh +++ b/examples/scala/linear-regression-scala/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is from Spark Examples (data/mllib/sample_linear_regression_data.txt) and put in examples/data # The data file should be copied to $HDFS_ROOT before running examples diff --git a/examples/linear-regression/run-gpu-standalone.sh b/examples/scala/linear-regression-scala/run-gpu.sh similarity index 94% rename from examples/linear-regression/run-gpu-standalone.sh rename to examples/scala/linear-regression-scala/run-gpu.sh index 07331bf6d..bb8896c4a 100755 --- a/examples/linear-regression/run-gpu-standalone.sh +++ b/examples/scala/linear-regression-scala/run-gpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is from Spark Examples (data/mllib/sample_linear_regression_data.txt) and put in examples/data # The data file should be copied to $HDFS_ROOT before running examples @@ -10,7 +11,7 @@ APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.LinearRegressionExample DEVICE=GPU -RESOURCE_FILE=$PWD/IntelGpuResourceFile.json +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 @@ -39,4 +40,3 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --class $APP_CLASS \ $APP_JAR $DATA_FILE \ 2>&1 | tee LinearRegression-$(date +%m%d_%H_%M_%S).log - diff --git a/examples/linear-regression/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala b/examples/scala/linear-regression-scala/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala similarity index 100% rename from examples/linear-regression/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala rename to examples/scala/linear-regression-scala/src/main/scala/org/apache/spark/examples/ml/LinearRegressionExample.scala diff --git a/examples/naive-bayes/build.sh b/examples/scala/naive-bayes-scala/build.sh similarity index 100% rename from examples/naive-bayes/build.sh rename to examples/scala/naive-bayes-scala/build.sh diff --git a/examples/naive-bayes/pom.xml b/examples/scala/naive-bayes-scala/pom.xml similarity index 100% rename from examples/naive-bayes/pom.xml rename to examples/scala/naive-bayes-scala/pom.xml diff --git a/examples/naive-bayes/run.sh b/examples/scala/naive-bayes-scala/run-cpu.sh similarity index 96% rename from examples/naive-bayes/run.sh rename to examples/scala/naive-bayes-scala/run-cpu.sh index 052412476..1cb2f5e7a 100755 --- a/examples/naive-bayes/run.sh +++ b/examples/scala/naive-bayes-scala/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh # Data file is from Spark Examples (data/mllib/sample_libsvm_data.txt) and put in examples/data # The data file should be copied to $HDFS_ROOT before running examples diff --git a/examples/naive-bayes/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala b/examples/scala/naive-bayes-scala/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala similarity index 100% rename from examples/naive-bayes/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala rename to examples/scala/naive-bayes-scala/src/main/scala/org/apache/spark/examples/ml/NaiveBayesExample.scala diff --git a/examples/pca/build.sh b/examples/scala/pca-scala/build.sh similarity index 100% rename from examples/pca/build.sh rename to examples/scala/pca-scala/build.sh diff --git a/examples/pca/pom.xml b/examples/scala/pca-scala/pom.xml similarity index 100% rename from examples/pca/pom.xml rename to examples/scala/pca-scala/pom.xml diff --git a/examples/pca/run.sh b/examples/scala/pca-scala/run-cpu.sh similarity index 95% rename from examples/pca/run.sh rename to examples/scala/pca-scala/run-cpu.sh index bbcc5e216..294058e99 100755 --- a/examples/pca/run.sh +++ b/examples/scala/pca-scala/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.PCAExample diff --git a/examples/pca/run-gpu-standalone.sh b/examples/scala/pca-scala/run-gpu.sh similarity index 93% rename from examples/pca/run-gpu-standalone.sh rename to examples/scala/pca-scala/run-gpu.sh index ac107dfaa..e3c1ae4ed 100755 --- a/examples/pca/run-gpu-standalone.sh +++ b/examples/scala/pca-scala/run-gpu.sh @@ -1,12 +1,13 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../../conf +source $CONF_PATH/env.sh APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.PCAExample DEVICE=GPU -RESOURCE_FILE=$PWD/IntelGpuResourceFile.json +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 diff --git a/examples/pca/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala b/examples/scala/pca-scala/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala similarity index 100% rename from examples/pca/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala rename to examples/scala/pca-scala/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala diff --git a/examples/summarizer/build.sh b/examples/scala/summarizer-scala/build.sh similarity index 100% rename from examples/summarizer/build.sh rename to examples/scala/summarizer-scala/build.sh diff --git a/examples/summarizer/pom.xml b/examples/scala/summarizer-scala/pom.xml similarity index 100% rename from examples/summarizer/pom.xml rename to examples/scala/summarizer-scala/pom.xml diff --git a/examples/summarizer/run.sh b/examples/scala/summarizer-scala/run-cpu.sh similarity index 90% rename from examples/summarizer/run.sh rename to examples/scala/summarizer-scala/run-cpu.sh index 1532f15a4..9a1e2eb7b 100755 --- a/examples/summarizer/run.sh +++ b/examples/scala/summarizer-scala/run-cpu.sh @@ -1,6 +1,7 @@ #!/usr/bin/env bash -source ../../conf/env.sh +CONF_PATH=$PWD/../../conf +source $CONF_PATH/env.sh DEVICE=CPU APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar @@ -24,4 +25,4 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER -v \ --jars $OAP_MLLIB_JAR \ --class $APP_CLASS \ $APP_JAR \ - 2>&1 | tee Summarizer-$(date +%m%d_%H_%M_%S).log \ No newline at end of file + 2>&1 | tee Summarizer-$(date +%m%d_%H_%M_%S).log diff --git a/examples/summarizer/run-gpu-standalone.sh b/examples/scala/summarizer-scala/run-gpu.sh similarity index 85% rename from examples/summarizer/run-gpu-standalone.sh rename to examples/scala/summarizer-scala/run-gpu.sh index 6952a02b3..5e795cea2 100755 --- a/examples/summarizer/run-gpu-standalone.sh +++ b/examples/scala/summarizer-scala/run-gpu.sh @@ -1,15 +1,13 @@ #!/usr/bin/env bash -source ../../conf/env.sh - -# Data file is from Spark Examples (data/mllib/sample_kmeans_data.txt) and put in examples/data -# The data file should be copied to $HDFS_ROOT before running examples +CONF_PATH=$PWD/../../conf +source $CONF_PATH/env.sh APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.SummaryStatisticsExample DEVICE=GPU -RESOURCE_FILE=$PWD/IntelGpuResourceFile.json +RESOURCE_FILE=$CONF_PATH/IntelGpuResourceFile.json WORKER_GPU_AMOUNT=4 EXECUTOR_GPU_AMOUNT=1 TASK_GPU_AMOUNT=1 diff --git a/examples/summarizer/src/main/scala/org/apache/spark/examples/ml/SummaryStatisticsExample.scala b/examples/scala/summarizer-scala/src/main/scala/org/apache/spark/examples/ml/SummaryStatisticsExample.scala similarity index 100% rename from examples/summarizer/src/main/scala/org/apache/spark/examples/ml/SummaryStatisticsExample.scala rename to examples/scala/summarizer-scala/src/main/scala/org/apache/spark/examples/ml/SummaryStatisticsExample.scala diff --git a/examples/summarizer/IntelGpuResourceFile.json b/examples/summarizer/IntelGpuResourceFile.json deleted file mode 100644 index 4b5c3cc98..000000000 --- a/examples/summarizer/IntelGpuResourceFile.json +++ /dev/null @@ -1 +0,0 @@ -[{"id":{"componentName": "spark.worker","resourceName":"gpu"},"addresses":["0","1","2","3"]}]