Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[REVIEW] KL Divergence metric implementation #674

Merged
merged 15 commits into from
Jun 30, 2019
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## New Features

- PR #674: KL Divergence metric ml-prim
venkywonka marked this conversation as resolved.
Show resolved Hide resolved
- PR #652: Adjusted Rand Index metric ml-prim
- PR #679: Class label manipulation ml-prim
- PR #636: Rand Index metric ml-prim
Expand Down
13 changes: 13 additions & 0 deletions cpp/src/metrics/metrics.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "metrics.hpp"

#include "metrics/adjustedRandIndex.h"
#include "metrics/klDivergence.h"
#include "metrics/randIndex.h"
#include "score/scores.h"

Expand Down Expand Up @@ -49,5 +50,17 @@ double adjustedRandIndex(const cumlHandle &handle, const int *y,
handle.getDeviceAllocator(), handle.getStream());
}

double klDivergence(const cumlHandle &handle, const double *y,
const double *y_hat, int n) {
return MLCommon::Metrics::klDivergence(
y, y_hat, n, handle.getDeviceAllocator(), handle.getStream());
}

float klDivergence(const cumlHandle &handle, const float *y, const float *y_hat,
int n) {
return MLCommon::Metrics::klDivergence(
y, y_hat, n, handle.getDeviceAllocator(), handle.getStream());
}

} // namespace Metrics
} // namespace ML
136 changes: 84 additions & 52 deletions cpp/src/metrics/metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,72 +23,104 @@ namespace ML {
namespace Metrics {

/**
* Calculates the "Coefficient of Determination" (R-Squared) score
* normalizing the sum of squared errors by the total sum of squares
* with single precision.
*
* This score indicates the proportionate amount of variation in an
* expected response variable is explained by the independent variables
* in a linear regression model. The larger the R-squared value, the
* more variability is explained by the linear regression model.
*
* @param handle: cumlHandle
* @param y: Array of ground-truth response variables
* @param y_hat: Array of predicted response variables
* @param n: Number of elements in y and y_hat
* @return: The R-squared value.
*/
* Calculates the "Coefficient of Determination" (R-Squared) score
* normalizing the sum of squared errors by the total sum of squares
* with single precision.
*
* This score indicates the proportionate amount of variation in an
* expected response variable is explained by the independent variables
* in a linear regression model. The larger the R-squared value, the
* more variability is explained by the linear regression model.
*
* @param handle: cumlHandle
* @param y: Array of ground-truth response variables
* @param y_hat: Array of predicted response variables
* @param n: Number of elements in y and y_hat
* @return: The R-squared value.
*/
float r2_score_py(const cumlHandle &handle, float *y, float *y_hat, int n);

/**
* Calculates the "Coefficient of Determination" (R-Squared) score
* normalizing the sum of squared errors by the total sum of squares
* with double precision.
*
* This score indicates the proportionate amount of variation in an
* expected response variable is explained by the independent variables
* in a linear regression model. The larger the R-squared value, the
* more variability is explained by the linear regression model.
*
* @param handle: cumlHandle
* @param y: Array of ground-truth response variables
* @param y_hat: Array of predicted response variables
* @param n: Number of elements in y and y_hat
* @return: The R-squared value.
*/
* Calculates the "Coefficient of Determination" (R-Squared) score
* normalizing the sum of squared errors by the total sum of squares
* with double precision.
*
* This score indicates the proportionate amount of variation in an
* expected response variable is explained by the independent variables
* in a linear regression model. The larger the R-squared value, the
* more variability is explained by the linear regression model.
*
* @param handle: cumlHandle
* @param y: Array of ground-truth response variables
* @param y_hat: Array of predicted response variables
* @param n: Number of elements in y and y_hat
* @return: The R-squared value.
*/
double r2_score_py(const cumlHandle &handle, double *y, double *y_hat, int n);

/**
* Calculates the "rand index"
*
* This metric is a measure of similarity between two data clusterings.
*
* @param handle: cumlHandle
* @param y: Array of response variables of the first clustering classifications
* @param y_hat: Array of response variables of the second clustering classifications
* @param n: Number of elements in y and y_hat
* @return: The rand index value
*/
* Calculates the "rand index"
*
* This metric is a measure of similarity between two data clusterings.
*
* @param handle: cumlHandle
* @param y: Array of response variables of the first clustering classifications
* @param y_hat: Array of response variables of the second clustering classifications
* @param n: Number of elements in y and y_hat
* @return: The rand index value
*/

double randIndex(const cumlHandle &handle, double *y, double *y_hat, int n);

/**
* Calculates the "adjusted rand index"
*
* This metric is the corrected-for-chance version of the rand index
*
* @param handle: cumlHandle
* @param y: Array of response variables of the first clustering classifications
* @param y_hat: Array of response variables of the second clustering classifications
* @param n: Number of elements in y and y_hat
* @param lower_class_range: the lowest value in the range of classes
* @param upper_class_range: the highest value in the range of classes
* @return: The adjusted rand index value
*/
* Calculates the "adjusted rand index"
*
* This metric is the corrected-for-chance version of the rand index
*
* @param handle: cumlHandle
* @param y: Array of response variables of the first clustering classifications
* @param y_hat: Array of response variables of the second clustering classifications
* @param n: Number of elements in y and y_hat
* @param lower_class_range: the lowest value in the range of classes
* @param upper_class_range: the highest value in the range of classes
* @return: The adjusted rand index value
*/
double adjustedRandIndex(const cumlHandle &handle, const int *y,
const int *y_hat, const int n,
const int lower_class_range,
const int upper_class_range);

/**
* Calculates the "Kullback-Leibler Divergence"
*
* The KL divergence tells us how well the probability distribution Q
* approximates the probability distribution P
* It is often also used as a 'distance metric' between two probablity ditributions (not symmetric)
*
* @param handle: cumlHandle
* @param y: Array of probabilities corresponding to distribution P
* @param y_hat: Array of probabilities corresponding to distribution Q
* @param n: Number of elements in y and y_hat
* @return: The KL Divergence value
*/
double klDivergence(const cumlHandle &handle, const double *y,
const double *y_hat, int n);

/**
* Calculates the "Kullback-Leibler Divergence"
*
* The KL divergence tells us how well the probability distribution Q
* approximates the probability distribution P
* It is often also used as a 'distance metric' between two probablity ditributions (not symmetric)
*
* @param handle: cumlHandle
* @param y: Array of probabilities corresponding to distribution P
* @param y_hat: Array of probabilities corresponding to distribution Q
* @param n: Number of elements in y and y_hat
* @return: The KL Divergence value
*/
float klDivergence(const cumlHandle &handle, const float *y, const float *y_hat,
int n);

} // namespace Metrics
} // namespace ML
82 changes: 82 additions & 0 deletions cpp/src_prims/metrics/klDivergence.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* @file klDivergence.h
* @brief The KL divergence tells us how well the probability distribution Q AKA candidatePDF
* approximates the probability distribution P AKA modelPDF.
*/

#include <math.h>
#include "common/cuml_allocator.hpp"
#include "common/device_buffer.hpp"
#include "cuda_utils.h"
#include "linalg/map_then_reduce.h"

namespace MLCommon {

/**
* @brief the KL Diverence mapping function
*
* @tparam Type: Data type of the input
* @param modelPDF: the model probability density function of type DataT
* @param candidatePDF: the candidate probability density function of type DataT
*/
template <typename Type>
struct KLDOp {
HDI Type operator()(Type modelPDF, Type candidatePDF) {
if (modelPDF == 0.0)
return 0;

else
return modelPDF * (log(modelPDF) - log(candidatePDF));
}
};

namespace Metrics {

/**
* @brief Function to calculate KL Divergence
* <a href="https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence">more info on KL Divergence</a>
*
* @tparam DataT: Data type of the input array
* @param modelPDF: the model array of probability density functions of type DataT
* @param candidatePDF: the candidate array of probability density functions of type DataT
* @param size: the size of the data points of type int
* @param allocator: object that takes care of temporary device memory allocation of type std::shared_ptr<MLCommon::deviceAllocator>
* @param stream: the cudaStream object
*/
template <typename DataT>
DataT klDivergence(const DataT* modelPDF, const DataT* candidatePDF, int size,
std::shared_ptr<MLCommon::deviceAllocator> allocator,
cudaStream_t stream) {
MLCommon::device_buffer<DataT> d_KLDVal(allocator, stream, 1);
CUDA_CHECK(cudaMemsetAsync(d_KLDVal.data(), 0, sizeof(DataT), stream));

MLCommon::LinAlg::mapThenSumReduce<DataT, KLDOp<DataT>, 256, const DataT*>(
d_KLDVal.data(), (size_t)size, KLDOp<DataT>(), stream, modelPDF,
candidatePDF);

DataT h_KLDVal;

MLCommon::updateHost(&h_KLDVal, d_KLDVal.data(), 1, stream);

venkywonka marked this conversation as resolved.
Show resolved Hide resolved
CUDA_CHECK(cudaStreamSynchronize(stream));

return h_KLDVal;
}

}; //end namespace Metrics
}; //end namespace MLCommon
1 change: 1 addition & 0 deletions cpp/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,7 @@ if(BUILD_PRIMS_TESTS)
prims/grid_sync.cu
prims/hinge.cu
prims/host_buffer.cu
prims/klDivergence.cu
prims/knn.cu
prims/kselection.cu
prims/label.cu
Expand Down
114 changes: 114 additions & 0 deletions cpp/test/prims/klDivergence.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <gtest/gtest.h>
#include <algorithm>
#include <iostream>
#include <random>
#include "common/cuml_allocator.hpp"
#include "metrics/klDivergence.h"
#include "test_utils.h"

namespace MLCommon {
namespace Metrics {

//parameter structure definition
struct klDivergenceParam {
int nElements;
double tolerance;
};

//test fixture class
template <typename DataT>
class klDivergenceTest : public ::testing::TestWithParam<klDivergenceParam> {
protected:
//the constructor
void SetUp() override {
//getting the parameters
params = ::testing::TestWithParam<klDivergenceParam>::GetParam();

nElements = params.nElements;

//generating random value test input
std::vector<DataT> h_modelPDF(nElements, 0);
std::vector<DataT> h_candidatePDF(nElements, 0);
std::random_device rd;
std::default_random_engine dre(rd());
std::uniform_real_distribution<DataT> realGenerator(0.0, 1.0);

std::generate(h_modelPDF.begin(), h_modelPDF.end(),
[&]() { return realGenerator(dre); });
std::generate(h_candidatePDF.begin(), h_candidatePDF.end(),
[&]() { return realGenerator(dre); });

//allocating and initializing memory to the GPU
CUDA_CHECK(cudaStreamCreate(&stream));
MLCommon::allocate(d_modelPDF, nElements, true);
MLCommon::allocate(d_candidatePDF, nElements, true);

MLCommon::updateDevice(d_modelPDF, &h_modelPDF[0], (int)nElements, stream);
MLCommon::updateDevice(d_candidatePDF, &h_candidatePDF[0], (int)nElements,
stream);
std::shared_ptr<MLCommon::deviceAllocator> allocator(
new defaultDeviceAllocator);

//generating the golden output
venkywonka marked this conversation as resolved.
Show resolved Hide resolved
for (int i = 0; i < nElements; ++i) {
if (h_modelPDF[i] == 0.0)
truthklDivergence += 0;

else
truthklDivergence +=
h_modelPDF[i] * log(h_modelPDF[i] / h_candidatePDF[i]);
}

//calling the klDivergence CUDA implementation
computedklDivergence = MLCommon::Metrics::klDivergence(
d_modelPDF, d_candidatePDF, nElements, allocator, stream);
}

//the destructor
void TearDown() override {
CUDA_CHECK(cudaFree(d_modelPDF));
CUDA_CHECK(cudaFree(d_candidatePDF));
CUDA_CHECK(cudaStreamDestroy(stream));
}

//declaring the data values
klDivergenceParam params;
DataT* d_modelPDF = nullptr;
DataT* d_candidatePDF = nullptr;
int nElements = 0;
DataT truthklDivergence = 0;
DataT computedklDivergence = 0;
cudaStream_t stream;
};

//setting test parameter values
const std::vector<klDivergenceParam> inputs = {
{500, 0.000001}, {200, 0.001}, {5000, 0.000001}, {500000, 0.000001}

};

//writing the test suite
typedef klDivergenceTest<double> klDivergenceTestClass;
TEST_P(klDivergenceTestClass, Result) {
ASSERT_NEAR(computedklDivergence, truthklDivergence, params.tolerance);
}
INSTANTIATE_TEST_CASE_P(klDivergence, klDivergenceTestClass,
::testing::ValuesIn(inputs));

} //end namespace Metrics
} //end namespace MLCommon