Skip to content

Commit

Permalink
Print time duration for each PCA step (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
xwu99 authored Mar 5, 2021
1 parent 3252ae9 commit e1c33d9
Showing 1 changed file with 17 additions and 4 deletions.
21 changes: 17 additions & 4 deletions mllib-dal/src/main/native/PCADALImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_feature_PCADALImpl_cPCATrainDAL
int nThreadsNew = services::Environment::getInstance()->getNumberOfThreads();
cout << "oneDAL (native): Number of threads used: " << nThreadsNew << endl;

auto t1 = std::chrono::high_resolution_clock::now();

pca::Distributed<step1Local, algorithmFPType, pca::svdDense> localAlgorithm;

/* Set the input data set to the algorithm */
Expand All @@ -49,6 +51,12 @@ JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_feature_PCADALImpl_cPCATrainDAL
/* Compute PCA decomposition */
localAlgorithm.compute();

auto t2 = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::seconds>( t2 - t1 ).count();
std::cout << "PCA (native): local step took " << duration << " secs" << std::endl;

t1 = std::chrono::high_resolution_clock::now();

/* Serialize partial results required by step 2 */
services::SharedPtr<byte> serializedData;
InputDataArchive dataArch;
Expand All @@ -60,26 +68,31 @@ JNIEXPORT jlong JNICALL Java_org_apache_spark_ml_feature_PCADALImpl_cPCATrainDAL
byte* nodeResults = new byte[perNodeArchLength];
dataArch.copyArchiveToArray(nodeResults, perNodeArchLength);

t2 = std::chrono::high_resolution_clock::now();

duration = std::chrono::duration_cast<std::chrono::seconds>( t2 - t1 ).count();
std::cout << "PCA (native): serializing partial results took " << duration << " secs" << std::endl;

vector<size_t> recv_counts(comm_size * perNodeArchLength);
for (int i = 0; i < comm_size; i++) recv_counts[i] = perNodeArchLength;

cout << "PCA (native): ccl_allgatherv receiving " << perNodeArchLength * nBlocks << " bytes" << endl;

auto t1 = std::chrono::high_resolution_clock::now();
t1 = std::chrono::high_resolution_clock::now();

/* Transfer partial results to step 2 on the root node */
// MPI_Gather(nodeResults, perNodeArchLength, MPI_CHAR, serializedData.get(),
// perNodeArchLength, MPI_CHAR, ccl_root, MPI_COMM_WORLD);
ccl::allgatherv(nodeResults, perNodeArchLength, serializedData.get(), recv_counts,
ccl::datatype::uint8, comm).wait();

auto t2 = std::chrono::high_resolution_clock::now();
t2 = std::chrono::high_resolution_clock::now();

auto duration = std::chrono::duration_cast<std::chrono::seconds>( t2 - t1 ).count();
duration = std::chrono::duration_cast<std::chrono::seconds>( t2 - t1 ).count();
std::cout << "PCA (native): ccl_allgatherv took " << duration << " secs" << std::endl;

if (rankId == ccl_root) {
auto t1 = std::chrono::high_resolution_clock::now();
auto t1 = std::chrono::high_resolution_clock::now();

/* Create an algorithm for principal component analysis using the svdDense method
* on the master node */
Expand Down

0 comments on commit e1c33d9

Please sign in to comment.