update

Signed-off-by: minmingzhu <minming.zhu@intel.com>
oap-project · Apr 23, 2023 · 6ad627b · 6ad627b
1 parent 35d198c
commit 6ad627b
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 9 deletions.
diff --git a/mllib-dal/src/main/native/DecisionForestOneAPIImpl.cpp b/mllib-dal/src/main/native/DecisionForestOneAPIImpl.cpp
@@ -131,7 +131,6 @@ struct collect_nodes {
         treesVector->push_back(convertsplitToLearningNode(info, classCount));
 
         std::cout << str << std::endl;
-        i++;
         return true;
     }
 };

diff --git a/...dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala b/...dal/src/main/scala/com/intel/oap/mllib/classification/RandomForestClassifierDALImpl.scala
@@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.Dataset
 import org.apache.spark.ml.tree
 import org.apache.spark.mllib.tree.model.ImpurityStats
+import scala.collection.JavaConversions._
 
 import java.util
 import java.util.{ArrayList, Map}

diff --git a/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala b/mllib-dal/src/main/scala/com/intel/oap/mllib/feature/PCADALImpl.scala
@@ -164,15 +164,15 @@ class PCADALImpl(val k: Int,
     val numCols = table.getNumberOfColumns.toInt
     require(k <= numRows, "k should be less or equal to row number")
 
-    val arrayDouble = getDoubleBufferDataFromDAL(table, numRows, device)
+    val arrayDouble = getDoubleBufferDataFromDAL(table, numRows, numCols)
 
     // Column-major, transpose of top K rows of NumericTable
     new DenseMatrix(numCols, k, arrayDouble.slice(0, numCols * k), false)
   }
 
-  private[mllib] def getExplainedVarianceFromDAL(table_1xn: HomogenTable, k: Int,
-                                          device: Common.ComputeDevice): DenseVector = {
-    val arrayDouble = getDoubleBufferDataFromDAL(table_1xn, 1, device)
+  private def getExplainedVarianceFromDAL(table_1xn: NumericTable, k: Int): DenseVector = {
+    val dataNumCols = table_1xn.getNumberOfColumns.toInt
+    val arrayDouble = getDoubleBufferDataFromDAL(table_1xn, 1, dataNumCols)
     val sum = arrayDouble.sum
     val topK = Arrays.copyOfRange(arrayDouble, 0, k)
     for (i <- 0 until k)
@@ -182,13 +182,15 @@ class PCADALImpl(val k: Int,
 
   // table.asInstanceOf[HomogenNumericTable].getDoubleArray() would error on GPU,
   // so use table.getBlockOfRows instead of it.
-  private[mllib] def getDoubleBufferDataFromDAL(table: HomogenTable,
+  private def getDoubleBufferDataFromDAL(table: NumericTable,
                                          numRows: Int,
-                                         device: Common.ComputeDevice): Array[Double] = {
+                                         numCols: Int): Array[Double] = {
+    var dataDouble: DoubleBuffer = null
 
     // returned DoubleBuffer is ByteByffer, need to copy as double array
-    val accessor = new RowAccessor(table.getcObejct(), device)
-    val arrayDouble: Array[Double] = accessor.pullDouble(0, numRows)
+    dataDouble = table.getBlockOfRows(0, numRows, dataDouble)
+    val arrayDouble: Array[Double] = new Array[Double](numRows * numCols)
+    dataDouble.get(arrayDouble)
 
     arrayDouble
   }

diff --git a/...l/src/main/scala/org/apache/spark/ml/classification/spark321/RandomForestClassifier.scala b/...l/src/main/scala/org/apache/spark/ml/classification/spark321/RandomForestClassifier.scala
@@ -24,6 +24,7 @@ import org.json4s.JsonDSL._
 import com.intel.oap.mllib.Utils
 import com.intel.oap.mllib.classification.{LearningNode => LearningNodeDAL, RandomForestClassifierDALImpl, RandomForestClassifierShim}
 import java.util.{Map => JavaMap}
+
 import scala.jdk.CollectionConverters._
 import org.apache.spark.annotation.Since
 import org.apache.spark.ml.classification.{BinaryRandomForestClassificationTrainingSummaryImpl, DecisionTreeClassificationModel, ProbabilisticClassifier, RandomForestClassificationModel, RandomForestClassificationTrainingSummaryImpl}
@@ -43,6 +44,8 @@ import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions.{col, udf}
 import org.apache.spark.sql.types.StructType
 
+import scala.collection.JavaConversions.mapAsScalaMap
+
 // scalastyle:off line.size.limit
 /**
  * <a href="http://en.wikipedia.org/wiki/Random_forest">Random Forest</a> learning algorithm for
-Original file line number
+Diff line change
@@ Expand Up / @@ -131,7 +131,6 @@ struct collect_nodes { @@
             treesVector->push_back(convertsplitToLearningNode(info, classCount));
             std::cout << str << std::endl;
-            i++;
             return true;
         }
     };
@@ Expand Down @@