Skip to content

Commit

Permalink
Merge branch 'oap-project:master' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
minmingzhu authored Jul 22, 2024
2 parents 3bee6bb + d15025c commit 5d08ac0
Show file tree
Hide file tree
Showing 29 changed files with 254 additions and 150 deletions.
17 changes: 10 additions & 7 deletions mllib-dal/src/main/native/CorrelationImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,13 +149,15 @@ static void doCorrelationDaalCompute(JNIEnv *env, jobject obj, size_t rankId,

#ifdef CPU_GPU_PROFILE
static void doCorrelationOneAPICompute(
JNIEnv *env, jlong pNumTabData,
JNIEnv *env, jlong pNumTabData, jlong numRows, jlong numCols,
preview::spmd::communicator<preview::spmd::device_memory_access::usm> comm,
jobject resultObj) {
logger::println(logger::INFO, "oneDAL (native): GPU compute start");
const bool isRoot = (comm.get_rank() == ccl_root);
homogen_table htable =
*reinterpret_cast<const homogen_table *>(pNumTabData);
homogen_table htable = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabData, numRows, numCols,
comm.get_queue())
.get());

const auto cor_desc =
covariance_gpu::descriptor<GpuAlgorithmFPType>{}.set_result_options(
Expand Down Expand Up @@ -195,9 +197,9 @@ static void doCorrelationOneAPICompute(

JNIEXPORT jlong JNICALL
Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL(
JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum,
jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray,
jobject resultObj) {
JNIEnv *env, jobject obj, jlong pNumTabData, jlong numRows, jlong numCols,
jint executorNum, jint executorCores, jint computeDeviceOrdinal,
jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"oneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down Expand Up @@ -240,7 +242,8 @@ Java_com_intel_oap_mllib_stat_CorrelationDALImpl_cCorrelationTrainDAL(
auto comm =
preview::spmd::make_communicator<preview::spmd::backend::ccl>(
queue, size, rankId, kvs);
doCorrelationOneAPICompute(env, pNumTabData, comm, resultObj);
doCorrelationOneAPICompute(env, pNumTabData, numRows, numCols, comm,
resultObj);
env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0);
break;
}
Expand Down
38 changes: 20 additions & 18 deletions mllib-dal/src/main/native/DecisionForestClassifierImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,8 @@ jobject collect_model(JNIEnv *env, const df::model<Task> &m,
}

static jobject doRFClassifierOneAPICompute(
JNIEnv *env, jlong pNumTabFeature, jlong pNumTabLabel, jint executorNum,
JNIEnv *env, jlong pNumTabFeature, jlong featureRows, jlong featureCols,
jlong pNumTabLabel, jlong labelCols, jint executorNum,
jint computeDeviceOrdinal, jint classCount, jint treeCount,
jint numFeaturesPerNode, jint minObservationsLeafNode,
jint minObservationsSplitNode, jdouble minWeightFractionLeafNode,
Expand All @@ -218,15 +219,14 @@ static jobject doRFClassifierOneAPICompute(
jobject resultObj) {
logger::println(logger::INFO, "oneDAL (native): GPU compute start");
const bool isRoot = (comm.get_rank() == ccl_root);
homogen_table hFeaturetable =
*reinterpret_cast<const homogen_table *>(pNumTabFeature);
homogen_table hLabeltable =
*reinterpret_cast<const homogen_table *>(pNumTabLabel);
logger::println(logger::INFO,
"doRFClassifierOneAPICompute get_column_count = %d",
hFeaturetable.get_column_count());
logger::println(logger::INFO, "doRFClassifierOneAPICompute classCount = %d",
classCount);
homogen_table hFeaturetable = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabFeature, featureRows, featureCols,
comm.get_queue())
.get());
homogen_table hLabeltable = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabLabel, featureRows, labelCols,
comm.get_queue())
.get());

const auto df_desc =
df::descriptor<GpuAlgorithmFPType, df::method::hist,
Expand Down Expand Up @@ -300,9 +300,10 @@ static jobject doRFClassifierOneAPICompute(
*/
JNIEXPORT jobject JNICALL
Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassifierTrainDAL(
JNIEnv *env, jobject obj, jlong pNumTabFeature, jlong pNumTabLabel,
jint executorNum, jint computeDeviceOrdinal, jint classCount,
jint treeCount, jint numFeaturesPerNode, jint minObservationsLeafNode,
JNIEnv *env, jobject obj, jlong pNumTabFeature, jlong featureRows,
jlong featureCols, jlong pNumTabLabel, jlong labelCols, jint executorNum,
jint computeDeviceOrdinal, jint classCount, jint treeCount,
jint numFeaturesPerNode, jint minObservationsLeafNode,
jint minObservationsSplitNode, jdouble minWeightFractionLeafNode,
jdouble minImpurityDecreaseSplitNode, jint maxTreeDepth, jlong seed,
jint maxBins, jboolean bootstrap, jintArray gpuIdxArray,
Expand Down Expand Up @@ -333,11 +334,12 @@ Java_com_intel_oap_mllib_classification_RandomForestClassifierDALImpl_cRFClassif
preview::spmd::make_communicator<preview::spmd::backend::ccl>(
queue, size, rankId, kvs);
jobject hashmapObj = doRFClassifierOneAPICompute(
env, pNumTabFeature, pNumTabLabel, executorNum,
computeDeviceOrdinal, classCount, treeCount, numFeaturesPerNode,
minObservationsLeafNode, minObservationsSplitNode,
minWeightFractionLeafNode, minImpurityDecreaseSplitNode,
maxTreeDepth, seed, maxBins, bootstrap, comm, resultObj);
env, pNumTabFeature, featureRows, featureCols, pNumTabLabel,
labelCols, executorNum, computeDeviceOrdinal, classCount, treeCount,
numFeaturesPerNode, minObservationsLeafNode,
minObservationsSplitNode, minWeightFractionLeafNode,
minImpurityDecreaseSplitNode, maxTreeDepth, seed, maxBins,
bootstrap, comm, resultObj);
return hashmapObj;
}
default: {
Expand Down
36 changes: 19 additions & 17 deletions mllib-dal/src/main/native/DecisionForestRegressorImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -207,21 +207,23 @@ jobject collect_model(JNIEnv *env, const df::model<Task> &m,
}

static jobject doRFRegressorOneAPICompute(
JNIEnv *env, jlong pNumTabFeature, jlong pNumTabLabel, jint executorNum,
JNIEnv *env, jlong pNumTabFeature, jlong featureRows, jlong featureCols,
jlong pNumTabLabel, jlong labelCols, jint executorNum,
jint computeDeviceOrdinal, jint treeCount, jint numFeaturesPerNode,
jint minObservationsLeafNode, jint maxTreeDepth, jlong seed, jint maxbins,
jboolean bootstrap,
preview::spmd::communicator<preview::spmd::device_memory_access::usm> comm,
jobject resultObj) {
logger::println(logger::INFO, "OneDAL (native): GPU compute start");
const bool isRoot = (comm.get_rank() == ccl_root);
homogen_table hFeaturetable =
*reinterpret_cast<const homogen_table *>(pNumTabFeature);
homogen_table hLabeltable =
*reinterpret_cast<const homogen_table *>(pNumTabLabel);
logger::println(logger::INFO,
"doRFRegressorOneAPICompute get_column_count = %d",
hFeaturetable.get_column_count());
homogen_table hFeaturetable = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabFeature, featureRows, featureCols,
comm.get_queue())
.get());
homogen_table hLabeltable = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabLabel, featureRows, labelCols,
comm.get_queue())
.get());
const auto df_desc =
df::descriptor<GpuAlgorithmFPType, df::method::hist,
df::task::regression>{}
Expand Down Expand Up @@ -290,11 +292,11 @@ static jobject doRFRegressorOneAPICompute(

JNIEXPORT jobject JNICALL
Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTrainDAL(
JNIEnv *env, jobject obj, jlong pNumTabFeature, jlong pNumTabLabel,
jint executorNum, jint computeDeviceOrdinal, jint treeCount,
jint numFeaturesPerNode, jint minObservationsLeafNode, jint maxTreeDepth,
jlong seed, jint maxbins, jboolean bootstrap, jintArray gpuIdxArray,
jobject resultObj) {
JNIEnv *env, jobject obj, jlong pNumTabFeature, jlong featureRows,
jlong featureCols, jlong pNumTabLabel, jlong labelCols, jint executorNum,
jint computeDeviceOrdinal, jint treeCount, jint numFeaturesPerNode,
jint minObservationsLeafNode, jint maxTreeDepth, jlong seed, jint maxbins,
jboolean bootstrap, jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"OneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down Expand Up @@ -322,10 +324,10 @@ Java_com_intel_oap_mllib_regression_RandomForestRegressorDALImpl_cRFRegressorTra
preview::spmd::make_communicator<preview::spmd::backend::ccl>(
queue, size, rankId, kvs);
jobject hashmapObj = doRFRegressorOneAPICompute(
env, pNumTabFeature, pNumTabLabel, executorNum,
computeDeviceOrdinal, treeCount, numFeaturesPerNode,
minObservationsLeafNode, maxTreeDepth, seed, maxbins, bootstrap,
comm, resultObj);
env, pNumTabFeature, featureRows, featureCols, pNumTabLabel,
labelCols, executorNum, computeDeviceOrdinal, treeCount,
numFeaturesPerNode, minObservationsLeafNode, maxTreeDepth, seed,
maxbins, bootstrap, comm, resultObj);
return hashmapObj;
}
default: {
Expand Down
24 changes: 13 additions & 11 deletions mllib-dal/src/main/native/KMeansImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,16 @@ static jlong doKMeansDaalCompute(JNIEnv *env, jobject obj, size_t rankId,

#ifdef CPU_GPU_PROFILE
static jlong doKMeansOneAPICompute(
JNIEnv *env, jlong pNumTabData, jlong pNumTabCenters, jint clusterNum,
jdouble tolerance, jint iterationNum,
JNIEnv *env, jlong pNumTabData, jlong numRows, jlong numCols,
jlong pNumTabCenters, jint clusterNum, jdouble tolerance, jint iterationNum,
preview::spmd::communicator<preview::spmd::device_memory_access::usm> comm,
jobject resultObj) {
logger::println(logger::INFO, "OneDAL (native): GPU compute start");
const bool isRoot = (comm.get_rank() == ccl_root);
homogen_table htable =
*reinterpret_cast<const homogen_table *>(pNumTabData);
homogen_table htable = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabData, numRows, numCols,
comm.get_queue())
.get());
homogen_table centroids =
*reinterpret_cast<const homogen_table *>(pNumTabCenters);
const auto kmeans_desc = kmeans_gpu::descriptor<GpuAlgorithmFPType>()
Expand Down Expand Up @@ -303,10 +305,10 @@ static jlong doKMeansOneAPICompute(
*/
JNIEXPORT jlong JNICALL
Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCenters(
JNIEnv *env, jobject obj, jlong pNumTabData, jlong pNumTabCenters,
jint clusterNum, jdouble tolerance, jint iterationNum, jint executorNum,
jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray,
jobject resultObj) {
JNIEnv *env, jobject obj, jlong pNumTabData, jlong numRows, jlong numCols,
jlong pNumTabCenters, jint clusterNum, jdouble tolerance, jint iterationNum,
jint executorNum, jint executorCores, jint computeDeviceOrdinal,
jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"OneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down Expand Up @@ -352,9 +354,9 @@ Java_com_intel_oap_mllib_clustering_KMeansDALImpl_cKMeansOneapiComputeWithInitCe
auto comm =
preview::spmd::make_communicator<preview::spmd::backend::ccl>(
queue, size, rankId, kvs);
ret =
doKMeansOneAPICompute(env, pNumTabData, pNumTabCenters, clusterNum,
tolerance, iterationNum, comm, resultObj);
ret = doKMeansOneAPICompute(env, pNumTabData, numRows, numCols,
pNumTabCenters, clusterNum, tolerance,
iterationNum, comm, resultObj);

env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0);
break;
Expand Down
31 changes: 18 additions & 13 deletions mllib-dal/src/main/native/LinearRegressionImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,9 +216,10 @@ ridge_regression_compute(size_t rankId, ccl::communicator &comm,
#ifdef CPU_GPU_PROFILE
static jlong doLROneAPICompute(JNIEnv *env, size_t rankId,
ccl::communicator &cclComm, sycl::queue &queue,
jlong pData, jlong pLabel,
jboolean jfitIntercept, jint executorNum,
jobject resultObj) {
jlong pNumTabFeature, jlong featureRows,
jlong featureCols, jlong pNumTabLabel,
jlong labelCols, jboolean jfitIntercept,
jint executorNum, jobject resultObj) {
logger::println(logger::INFO,
"oneDAL (native): GPU compute start , rankid %d", rankId);
const bool isRoot = (rankId == ccl_root);
Expand All @@ -228,9 +229,14 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId,
ccl::shared_ptr_class<ccl::kvs> &kvs = getKvs();
auto comm = preview::spmd::make_communicator<preview::spmd::backend::ccl>(
queue, size, rankId, kvs);

homogen_table xtrain = *reinterpret_cast<const homogen_table *>(pData);
homogen_table ytrain = *reinterpret_cast<const homogen_table *>(pLabel);
homogen_table xtrain = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabFeature, featureRows, featureCols,
comm.get_queue())
.get());
homogen_table ytrain = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabLabel, featureRows, labelCols,
comm.get_queue())
.get());

linear_regression_gpu::train_input local_input{xtrain, ytrain};
const auto linear_regression_desc =
Expand All @@ -256,7 +262,8 @@ static jlong doLROneAPICompute(JNIEnv *env, size_t rankId,
*/
JNIEXPORT jlong JNICALL
Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTrainDAL(
JNIEnv *env, jobject obj, jlong data, jlong label, jboolean fitIntercept,
JNIEnv *env, jobject obj, jlong feature, jlong featureRows,
jlong featureCols, jlong label, jlong labelCols, jboolean fitIntercept,
jdouble regParam, jdouble elasticNetParam, jint executorNum,
jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray,
jobject resultObj) {
Expand Down Expand Up @@ -288,16 +295,14 @@ Java_com_intel_oap_mllib_regression_LinearRegressionDALImpl_cLinearRegressionTra
auto queue =
getAssignedGPU(device, cclComm, size, rankId, gpuIndices, nGpu);

jlong pDatagpu = (jlong)data;
jlong pLabelgpu = (jlong)label;
resultptr =
doLROneAPICompute(env, rankId, cclComm, queue, pDatagpu, pLabelgpu,
fitIntercept, executorNum, resultObj);
resultptr = doLROneAPICompute(
env, rankId, cclComm, queue, feature, featureRows, featureCols,
label, labelCols, fitIntercept, executorNum, resultObj);
env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0);
#endif
} else {
NumericTablePtr pLabel = *((NumericTablePtr *)label);
NumericTablePtr pData = *((NumericTablePtr *)data);
NumericTablePtr pData = *((NumericTablePtr *)feature);

// Set number of threads for oneDAL to use for each rank
services::Environment::getInstance()->setNumberOfThreads(executorCores);
Expand Down
16 changes: 9 additions & 7 deletions mllib-dal/src/main/native/PCAImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,15 @@ static void doPCADAALCompute(JNIEnv *env, jobject obj, size_t rankId,

#ifdef CPU_GPU_PROFILE
static void doPCAOneAPICompute(
JNIEnv *env, jlong pNumTabData,
JNIEnv *env, jlong pNumTabData, jlong numRows, jlong numCols,
preview::spmd::communicator<preview::spmd::device_memory_access::usm> comm,
jobject resultObj) {
logger::println(logger::INFO, "oneDAL (native): GPU compute start");
const bool isRoot = (comm.get_rank() == ccl_root);
homogen_table htable =
*reinterpret_cast<const homogen_table *>(pNumTabData);
homogen_table htable = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabData, numRows, numCols,
comm.get_queue())
.get());

const auto cov_desc =
covariance_gpu::descriptor<GpuAlgorithmFPType>{}.set_result_options(
Expand Down Expand Up @@ -248,9 +250,9 @@ static void doPCAOneAPICompute(

JNIEXPORT jlong JNICALL
Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL(
JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum,
jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray,
jobject resultObj) {
JNIEnv *env, jobject obj, jlong pNumTabData, jlong numRows, jlong numCols,
jint executorNum, jint executorCores, jint computeDeviceOrdinal,
jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"oneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down Expand Up @@ -293,7 +295,7 @@ Java_com_intel_oap_mllib_feature_PCADALImpl_cPCATrainDAL(
auto comm =
preview::spmd::make_communicator<preview::spmd::backend::ccl>(
queue, size, rankId, kvs);
doPCAOneAPICompute(env, pNumTabData, comm, resultObj);
doPCAOneAPICompute(env, pNumTabData, numRows, numCols, comm, resultObj);
env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0);
break;
}
Expand Down
18 changes: 11 additions & 7 deletions mllib-dal/src/main/native/SummarizerImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,13 +202,16 @@ static void doSummarizerDAALCompute(JNIEnv *env, jobject obj, size_t rankId,

#ifdef CPU_GPU_PROFILE
static void doSummarizerOneAPICompute(
JNIEnv *env, jlong pNumTabData,
JNIEnv *env, jlong pNumTabData, jlong numRows, jlong numCols,
preview::spmd::communicator<preview::spmd::device_memory_access::usm> comm,
jobject resultObj) {
logger::println(logger::INFO, "oneDAL (native): GPU compute start");
const bool isRoot = (comm.get_rank() == ccl_root);
homogen_table htable =
*reinterpret_cast<const homogen_table *>(pNumTabData);
homogen_table htable = *reinterpret_cast<homogen_table *>(
createHomogenTableWithArrayPtr(pNumTabData, numRows, numCols,
comm.get_queue())
.get());

const auto bs_desc = basic_statistics::descriptor<GpuAlgorithmFPType>{};
auto t1 = std::chrono::high_resolution_clock::now();
const auto result_train = preview::compute(comm, bs_desc, htable);
Expand Down Expand Up @@ -265,9 +268,9 @@ static void doSummarizerOneAPICompute(

JNIEXPORT jlong JNICALL
Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL(
JNIEnv *env, jobject obj, jlong pNumTabData, jint executorNum,
jint executorCores, jint computeDeviceOrdinal, jintArray gpuIdxArray,
jobject resultObj) {
JNIEnv *env, jobject obj, jlong pNumTabData, jlong numRows, jlong numCols,
jint executorNum, jint executorCores, jint computeDeviceOrdinal,
jintArray gpuIdxArray, jobject resultObj) {
logger::println(logger::INFO,
"oneDAL (native): use DPC++ kernels; device %s",
ComputeDeviceString[computeDeviceOrdinal].c_str());
Expand Down Expand Up @@ -310,7 +313,8 @@ Java_com_intel_oap_mllib_stat_SummarizerDALImpl_cSummarizerTrainDAL(
auto comm =
preview::spmd::make_communicator<preview::spmd::backend::ccl>(
queue, size, rankId, kvs);
doSummarizerOneAPICompute(env, pNumTabData, comm, resultObj);
doSummarizerOneAPICompute(env, pNumTabData, numRows, numCols, comm,
resultObj);
env->ReleaseIntArrayElements(gpuIdxArray, gpuIndices, 0);
break;
}
Expand Down
Loading

0 comments on commit 5d08ac0

Please sign in to comment.