Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML-290] Add logger to improve messages output for C++ only #358

Merged
merged 64 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
be55d63
update spark to 3.3.3
minmingzhu Sep 18, 2023
6d2b055
Merge branch 'oap-project:master' into master
minmingzhu Sep 18, 2023
833444e
Merge branch 'oap-project:master' into master
minmingzhu Sep 19, 2023
23f0491
Merge branch 'oap-project:master' into master
minmingzhu Sep 19, 2023
f42d3be
Merge branch 'oap-project:master' into master
minmingzhu Sep 21, 2023
db6e64c
Merge branch 'oap-project:master' into master
minmingzhu Sep 26, 2023
d3acd92
First commit
argentea May 26, 2023
c9698ec
Logger tmp
argentea Jun 5, 2023
9f7fb3f
ALS done
argentea Jun 24, 2023
c9df07e
Correlation tmp
argentea Jun 26, 2023
65e9f9f
Add table output
argentea Jul 3, 2023
7ccfe3e
Use looger function to print table
argentea Jul 4, 2023
74bddfa
Correlation done
argentea Jul 4, 2023
31093f5
Add println
argentea Jul 4, 2023
2215810
Fix typo
argentea Jul 4, 2023
4f01e29
HomogenTable done
argentea Jul 4, 2023
0bbb103
Summarizer done
argentea Jul 4, 2023
7044306
Kmeans done
argentea Jul 4, 2023
5ea58c7
KMeans done
argentea Jul 4, 2023
31bbe4e
error_handling done
argentea Jul 4, 2023
af67fba
DFR and NB
argentea Jul 4, 2023
8583805
GPU done
argentea Jul 5, 2023
d03aa16
LR done
argentea Jul 5, 2023
9c47f7e
PCA and DF classifier done
argentea Jul 5, 2023
39cc551
table
argentea Jul 5, 2023
6520b39
Format cpp
argentea Jul 5, 2023
dc7ed88
All done
argentea Jul 7, 2023
07e697f
Format
argentea Jul 7, 2023
051bcfb
Replace print()
argentea Jul 20, 2023
69afa72
Move print(table to service
argentea Jul 20, 2023
e930a4b
Format
argentea Jul 21, 2023
13ef325
Change pringf
argentea Jul 21, 2023
6667a89
Remove spaces
argentea Aug 10, 2023
fc4b7ac
Make all error print in stderr
argentea Aug 10, 2023
0d529fe
Add switch
argentea Aug 10, 2023
f858bef
Code style
argentea Aug 10, 2023
e9aad35
Recover
argentea Aug 10, 2023
e724487
Add environment control
argentea Aug 10, 2023
6c9beac
Format
argentea Aug 10, 2023
abc5bee
Clean up
argentea Aug 10, 2023
8e1d2ae
Fix other printf, fix new line bug
argentea Aug 10, 2023
194f9c3
Format
argentea Aug 10, 2023
7c17237
Update run-gpu.sh
xwu99 Aug 11, 2023
9b918d4
Add level
argentea Aug 11, 2023
c5856ff
Format
argentea Aug 11, 2023
91378d4
Remove variable
argentea Aug 11, 2023
c5870dc
Fix typo
argentea Aug 16, 2023
9eb48d4
Add flush
argentea Aug 17, 2023
d6ec6ed
Fix device call host function
argentea Aug 23, 2023
ae18298
Format
argentea Aug 23, 2023
b914c6d
OneDAL rename
argentea Aug 30, 2023
4cd89c8
fix typo
argentea Aug 31, 2023
c4ff81b
Fix typo
argentea Aug 31, 2023
a06e116
update
minmingzhu Sep 26, 2023
45a979e
update
minmingzhu Sep 26, 2023
443b64c
Update ALSShuffle.cpp
xwu99 Sep 28, 2023
61153b9
Update ALSDALImpl.cpp
xwu99 Sep 28, 2023
a2e6baa
Update ALSDALImpl.cpp
xwu99 Sep 28, 2023
60fa92c
update
minmingzhu Sep 28, 2023
675fec8
Merge branch 'add_logger' of https://github.com/minmingzhu/oap-mllib …
minmingzhu Sep 28, 2023
309b4e6
update
minmingzhu Sep 28, 2023
a7109b9
update
minmingzhu Sep 28, 2023
ba2b141
update
minmingzhu Sep 28, 2023
904dffe
update
minmingzhu Sep 28, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 36 additions & 29 deletions mllib-dal/src/main/native/ALSDALImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

#include "ALSShuffle.h"

#include "Logger.h"

using namespace std;
using namespace daal;
using namespace daal::algorithms;
Expand Down Expand Up @@ -210,7 +212,7 @@ void initializeStep2Local(

void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId,
size_t nBlocks, size_t nUsers, size_t nFactors) {
std::cout << "ALS (native): initializeModel " << std::endl;
logger::println(logger::INFO, "ALS (native): initializeModel");

auto t1 = std::chrono::high_resolution_clock::now();

Expand All @@ -229,8 +231,8 @@ void initializeModel(size_t rankId, ccl::communicator &comm, size_t partitionId,
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::seconds>(t2 - t1).count();
std::cout << "ALS (native): initializeModel took " << duration << " secs"
<< std::endl;
logger::println(logger::INFO, "ALS (native): initializeModel took %d secs",
duration);
}

training::DistributedPartialResultStep1Ptr computeStep1Local(
Expand Down Expand Up @@ -312,7 +314,7 @@ computeStep4Local(const CSRNumericTablePtr &dataTable,

void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId,
size_t nBlocks, size_t nFactors, size_t maxIterations) {
std::cout << "ALS (native): trainModel" << std::endl;
logger::println(logger::INFO, "ALS (native): trainModel");

auto tStart = std::chrono::high_resolution_clock::now();

Expand All @@ -338,7 +340,7 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId,

serializeDAALObject(step1LocalResult.get(), nodeResults);

/* Gathering step1LocalResult on the master */
// Gathering step1LocalResult on the master
gather(rankId, comm, nBlocks, nodeResults, step1LocalResultsOnMaster);

if (rankId == ccl_root) {
Expand Down Expand Up @@ -381,7 +383,7 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId,

serializeDAALObject(step1LocalResult.get(), nodeResults);

/* Gathering step1LocalResult on the master */
// Gathering step1LocalResult on the master
gather(rankId, comm, nBlocks, nodeResults, step1LocalResultsOnMaster);

if (rankId == ccl_root) {
Expand Down Expand Up @@ -409,7 +411,7 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId,
step3LocalResult = computeStep3Local(
userOffset, usersPartialResultLocal, userStep3LocalInput, nFactors);

/* MPI_Alltoallv to populate step4LocalInput */
// all2all to populate step4LocalInput
for (size_t i = 0; i < nBlocks; i++) {
serializeDAALObject((*step3LocalResult)[i].get(), nodeCPs[i]);
}
Expand All @@ -421,15 +423,15 @@ void trainModel(size_t rankId, ccl::communicator &comm, size_t partitionId,
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::seconds>(t2 - t1).count();
std::cout << "ALS (native): iteration " << iteration << " took "
<< duration << " secs" << std::endl;
logger::println(logger::INFO, "ALS (native): iteration %d took %f secs",
iteration, duration);
}

auto tEnd = std::chrono::high_resolution_clock::now();
auto durationTotal =
std::chrono::duration_cast<std::chrono::seconds>(tEnd - tStart).count();
std::cout << "ALS (native): trainModel took " << durationTotal << " secs"
<< std::endl;
logger::println(logger::INFO, "ALS (native): trainModel took %d secs",
durationTotal);
}

static size_t getOffsetFromOffsetTable(NumericTablePtr offsetTable) {
Expand All @@ -446,8 +448,7 @@ JNIEXPORT jobject JNICALL
Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cShuffleData(
JNIEnv *env, jobject obj, jobject dataBuffer, jint nTotalKeys, jint nBlocks,
jobject infoObj) {
// cout << "cShuffleData: rank " << rankId << endl;
cout << "RATING_SIZE: " << RATING_SIZE << endl;
logger::println(logger::INFO, "RATING_SIZE: %d", RATING_SIZE);

ccl::communicator &comm = getComm();

Expand Down Expand Up @@ -491,19 +492,23 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS(

dataTable = *((CSRNumericTablePtr *)numTableAddr);

cout << "ALS (native): Input info: " << endl;
cout << "- NumberOfRows: " << dataTable->getNumberOfRows() << endl;
cout << "- NumberOfColumns: " << dataTable->getNumberOfColumns() << endl;
cout << "- NumberOfRatings: " << dataTable->getDataSize() << endl;
cout << "- fullNUsers: " << nUsers << endl;
cout << "- nFactors: " << nFactors << endl;

// Set number of threads for oneDAL to use for each rank
logger::println(logger::INFO, "ALS (native): Input info:");
logger::println(logger::INFO, "- NumberOfRows: %d",
dataTable->getNumberOfRows());
logger::println(logger::INFO, "- NumberOfColumns: %d",
dataTable->getNumberOfColumns());
logger::println(logger::INFO, "- NumberOfRatings: %d",
dataTable->getDataSize());
logger::println(logger::INFO, "- fullNUsers: %d", nUsers);
logger::println(logger::INFO, "- nFactors: %d", nFactors);

// Set number of threads for OneDAL to use for each rank
services::Environment::getInstance()->setNumberOfThreads(executor_cores);
int nThreadsNew =
services::Environment::getInstance()->getNumberOfThreads();
cout << "oneDAL (native): Number of CPU threads used: " << nThreadsNew
<< endl;
logger::println(logger::INFO,
"OneDAL (native): Number of CPU threads used: %d",
nThreadsNew);

int nBlocks = executor_num;
initializeModel(rankId, comm, partitionId, nBlocks, nUsers, nFactors);
Expand All @@ -514,16 +519,18 @@ Java_com_intel_oap_mllib_recommendation_ALSDALImpl_cDALImplictALS(
auto pItem = itemsPartialResultLocal->get(training::outputOfStep4ForStep1)
->getFactors();

std::cout << "\n=== Results for Rank " << rankId << "===\n" << std::endl;
logger::println(logger::INFO, "");
logger::println(logger::INFO, "=== Results for Rank %d ===", rankId);
logger::println(logger::INFO, "");
printNumericTable(pUser, "User Factors (first 10 rows x 20 columns):", 10,
20);
printNumericTable(pItem, "Item Factors (first 10 rows x 20 columns):", 10,
20);
std::cout << "User Offset: " << getOffsetFromOffsetTable(userOffset)
<< std::endl;
std::cout << "Item Offset: " << getOffsetFromOffsetTable(itemOffset)
<< std::endl;
std::cout << std::endl;
logger::println(logger::INFO, "User Offset: %d",
getOffsetFromOffsetTable(userOffset));
logger::println(logger::INFO, "Item Offset: %d",
getOffsetFromOffsetTable(itemOffset));
logger::println(logger::INFO, "");

// Get the class of the input object
jclass clazz = env->GetObjectClass(resultObj);
Expand Down
19 changes: 5 additions & 14 deletions mllib-dal/src/main/native/ALSShuffle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@

#include "ALSShuffle.h"

#include "Logger.h"

using namespace std;

std::vector<Rating> recvData;
Expand Down Expand Up @@ -72,11 +74,9 @@ Rating *shuffle_all2all(ccl::communicator &comm,
// Calculate send buffer size
for (size_t i = 0; i < nBlocks; i++) {
perNodeSendLens[i] = partitions[i].size() * RATING_SIZE;
// cout << "rank " << rankId << " Send partition " << i << " size " <<
// perNodeSendLens[i] << endl;
sendBufSize += perNodeSendLens[i];
}
cout << "sendData size " << sendBufSize << endl;
logger::println(logger::INFO, "sendData size %d", sendBufSize);
sendData.resize(sendBufSize);

// Fill in send buffer
Expand All @@ -94,8 +94,6 @@ Rating *shuffle_all2all(ccl::communicator &comm,

// Calculate recv buffer size
for (size_t i = 0; i < nBlocks; i++) {
// cout << "rank " << rankId << " Recv partition " << i << " size " <<
// perNodeRecvLens[i] << endl;
recvBufSize += perNodeRecvLens[i];
}

Expand All @@ -109,18 +107,11 @@ Rating *shuffle_all2all(ccl::communicator &comm,

sort(recvData.begin(), recvData.end(), compareRatingByUser);

// for (auto r : recvData) {
// cout << r.user << " " << r.item << " " << r.rating << endl;
// }

newRatingsNum = recvData.size();
// RatingPartition::iterator iter = std::unique(recvData.begin(),
// recvData.end(), compareRatingUserEquality); newCsrRowNum =
// std::distance(recvData.begin(), iter);
newCsrRowNum = distinct_count(recvData);

cout << "newRatingsNum: " << newRatingsNum
<< " newCsrRowNum: " << newCsrRowNum << endl;
logger::println(logger::INFO, "newRatingsNum: %d, newCsrRowNum: %d",
newRatingsNum, newCsrRowNum);

return recvData.data();
}
1 change: 0 additions & 1 deletion mllib-dal/src/main/native/Common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,4 @@

#include "GPU.h"
#include "Communicator.hpp"
#include "OutputHelpers.hpp"
#include "oneapi/dal/table/homogen.hpp"
55 changes: 33 additions & 22 deletions mllib-dal/src/main/native/CorrelationImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
#include "com_intel_oap_mllib_stat_CorrelationDALImpl.h"
#include "service.h"

#include "Logger.h"

using namespace std;
#ifdef CPU_GPU_PROFILE
namespace covariance_gpu = oneapi::dal::covariance;
Expand Down Expand Up @@ -53,8 +55,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId,
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Correleation (native): local step took " << duration / 1000
<< " secs" << std::endl;
logger::println(logger::INFO,
"Correleation (native): local step took %d secs",
duration / 1000);

t1 = std::chrono::high_resolution_clock::now();

Expand All @@ -80,8 +83,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId,

duration =
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1).count();
std::cout << "Correleation (native): ccl_allgatherv took "
<< duration / 1000 << " secs" << std::endl;
logger::println(logger::INFO,
"Correleation (native): ccl_allgatherv took %d secs",
duration / 1000);
if (isRoot) {
auto t1 = std::chrono::high_resolution_clock::now();
/* Create an algorithm to compute covariance on the master node */
Expand Down Expand Up @@ -119,8 +123,9 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId,
auto duration =
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
std::cout << "Correlation (native): master step took "
<< duration / 1000 << " secs" << std::endl;
logger::println(logger::INFO,
"Correleation (native): master step took %d secs",
duration / 1000);

/* Print the results */
printNumericTable(result->get(covariance_cpu::correlation),
Expand All @@ -147,7 +152,7 @@ static void doCorrelationOneAPICompute(
JNIEnv *env, jlong pNumTabData,
preview::spmd::communicator<preview::spmd::device_memory_access::usm> comm,
jobject resultObj) {
std::cout << "oneDAL (native): GPU compute start" << std::endl;
logger::println(logger::INFO, "oneDAL (native): GPU compute start");
const bool isRoot = (comm.get_rank() == ccl_root);
homogen_table htable =
*reinterpret_cast<const homogen_table *>(pNumTabData);
Expand All @@ -159,15 +164,18 @@ static void doCorrelationOneAPICompute(
auto t1 = std::chrono::high_resolution_clock::now();
const auto result_train = preview::compute(comm, cor_desc, htable);
if (isRoot) {
std::cout << "Mean:\n" << result_train.get_means() << std::endl;
std::cout << "Correlation:\n"
<< result_train.get_cor_matrix() << std::endl;
logger::println(logger::INFO, "Mean:");
printHomegenTable(result_train.get_means());
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

printHomegenTable should moved to logger, need to follow-up with another PR.

logger::println(logger::INFO, "Correlation:");
printHomegenTable(result_train.get_cor_matrix());
auto t2 = std::chrono::high_resolution_clock::now();
auto duration =
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
std::cout << "Correlation batch(native): computing step took "
<< duration / 1000 << " secs." << std::endl;
logger::println(
logger::INFO,
"Correlation batch(native): computing step took %d secs.",
duration / 1000);
// Return all covariance & mean
jclass clazz = env->GetObjectClass(resultObj);

Expand All @@ -190,9 +198,10 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL(
JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum,
jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray,
jobject resultObj) {
std::cout << "oneDAL (native): use DPC++ kernels "
<< "; device " << ComputeDeviceString[computeDeviceOrdinal]
<< std::endl;
logger::println(logger::INFO,
"oneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());

ccl::communicator &cclComm = getComm();
int rankId = cclComm.rank();
ComputeDevice device = getComputeDeviceByOrdinal(computeDeviceOrdinal);
Expand All @@ -205,18 +214,20 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL(

int nThreadsNew =
services::Environment::getInstance()->getNumberOfThreads();
std::cout << "oneDAL (native): Number of CPU threads used"
<< nThreadsNew << std::endl;
logger::println(logger::INFO,
"oneDAL (native): Number of CPU threads used %d",
nThreadsNew);
doCorrelationDaalCompute(env, obj, rankId, cclComm, pData, executorNum,
resultObj);
break;
}
#ifdef CPU_GPU_PROFILE
case ComputeDevice::gpu: {
int nGpu = env->GetArrayLength(gpuIdxArray);
std::cout << "oneDAL (native): use GPU kernels with " << nGpu
<< " GPU(s)"
<< " rankid " << rankId << std::endl;
logger::println(
logger::INFO,
"oneDAL (native): use GPU kernels with %d GPU(s) rankid %d", nGpu,
rankId);

jint *gpuIndices = env->GetIntArrayElements(gpuIdxArray, 0);

Expand All @@ -235,8 +246,8 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL(
}
#endif
default: {
std::cout << "no supported device!" << std::endl;
exit(-1);
deviceError("Correlation",
ComputeDeviceString[computeDeviceOrdinal].c_str());
}
}
return 0;
Expand Down
Loading