diff --git a/examples/als-pyspark/run.sh b/examples/als-pyspark/run.sh index b85ce26e9..f06bf1bad 100755 --- a/examples/als-pyspark/run.sh +++ b/examples/als-pyspark/run.sh @@ -6,6 +6,7 @@ source ../../conf/env.sh # The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=$HDFS_ROOT/data/onedal_als_csr_ratings.txt +DEVICE=CPU APP_PY=als-pyspark.py time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ @@ -17,6 +18,7 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.oap.mllib.device=$DEVICE" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ --conf "spark.shuffle.reduceLocality.enabled=false" \ diff --git a/examples/als/run.sh b/examples/als/run.sh index cbb3ce34f..7ff779b82 100755 --- a/examples/als/run.sh +++ b/examples/als/run.sh @@ -6,6 +6,7 @@ source ../../conf/env.sh # The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=$HDFS_ROOT/data/onedal_als_csr_ratings.txt +DEVICE=CPU APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.ALSExample @@ -18,6 +19,7 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.oap.mllib.device=$DEVICE" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ --conf "spark.shuffle.reduceLocality.enabled=false" \ diff --git a/examples/kmeans-pyspark/run.sh b/examples/kmeans-pyspark/run.sh index 4899cef2a..f0184d9f5 100755 --- a/examples/kmeans-pyspark/run.sh +++ b/examples/kmeans-pyspark/run.sh @@ -6,6 +6,7 @@ source ../../conf/env.sh # The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=$HDFS_ROOT/data/sample_kmeans_data.txt +DEVICE=CPU APP_PY=kmeans-pyspark.py time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ @@ -17,6 +18,7 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.oap.mllib.device=$DEVICE" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ --conf "spark.shuffle.reduceLocality.enabled=false" \ diff --git a/examples/linear-regression/run.sh b/examples/linear-regression/run.sh index e25cb269d..a4bc9d484 100755 --- a/examples/linear-regression/run.sh +++ b/examples/linear-regression/run.sh @@ -6,6 +6,7 @@ source ../../conf/env.sh # The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=$HDFS_ROOT/data/sample_linear_regression_data.txt +DEVICE=CPU APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.LinearRegressionExample @@ -18,6 +19,7 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.oap.mllib.device=$DEVICE" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ --conf "spark.shuffle.reduceLocality.enabled=false" \ diff --git a/examples/naive-bayes/run.sh b/examples/naive-bayes/run.sh index e3f747c42..052412476 100755 --- a/examples/naive-bayes/run.sh +++ b/examples/naive-bayes/run.sh @@ -6,6 +6,7 @@ source ../../conf/env.sh # The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=$HDFS_ROOT/data/sample_libsvm_data.txt +DEVICE=CPU APP_JAR=target/oap-mllib-examples-$OAP_MLLIB_VERSION.jar APP_CLASS=org.apache.spark.examples.ml.NaiveBayesExample @@ -18,6 +19,7 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.oap.mllib.device=$DEVICE" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ --conf "spark.shuffle.reduceLocality.enabled=false" \ diff --git a/examples/pca-pyspark/run.sh b/examples/pca-pyspark/run.sh index dfdc343a1..385ce5ca9 100755 --- a/examples/pca-pyspark/run.sh +++ b/examples/pca-pyspark/run.sh @@ -6,6 +6,7 @@ source ../../conf/env.sh # The data file should be copied to $HDFS_ROOT before running examples DATA_FILE=$HDFS_ROOT/data/pca_data.csv +DEVICE=CPU APP_PY=pca-pyspark.py K=3 @@ -18,6 +19,7 @@ time $SPARK_HOME/bin/spark-submit --master $SPARK_MASTER \ --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ --conf "spark.default.parallelism=$SPARK_DEFAULT_PARALLELISM" \ --conf "spark.sql.shuffle.partitions=$SPARK_DEFAULT_PARALLELISM" \ + --conf "spark.oap.mllib.device=$DEVICE" \ --conf "spark.driver.extraClassPath=$SPARK_DRIVER_CLASSPATH" \ --conf "spark.executor.extraClassPath=$SPARK_EXECUTOR_CLASSPATH" \ --conf "spark.shuffle.reduceLocality.enabled=false" \