You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Right now, there's ZERO guide on how to use the C/C++ API. Not even one runnable example is found online.
There's 1 code example (shown below), but it's totally outdated and cannot compile.
Motivation
It's useful because some people might need to integrate LightGBM directly into their C++ code.
The example code should show how to LoadData, set GBM config, train, predict an input, and free memory. It can either use the C++ code directly, or use the exposed C API shown here: https://lightgbm.readthedocs.io/en/latest/C-API.html
Description
Below is an example program that has been outdated and can't compile. I'd like to fix that.
References
#include <LightGBM/config.h>
#include <LightGBM/dataset_loader.h>
#include <LightGBM/boosting.h>
#include <LightGBM/objective_function.h>
#include <LightGBM/metric.h>
#include <LightGBM/utils/common.h>
#include <iostream>
#include <random>
#include <algorithm>
int main()
{
/* create example dataset */
std::random_device rd;
std::mt19937 gen(rd());
// one random generator for every class
std::vector<std::normal_distribution<>> dists = {
std::normal_distribution<>(0, 1),
std::normal_distribution<>(10, 1)};
/* create raw data */
const int numSamples = 5000;
const int numFeats = 2;
const int numClasses = static_cast<int>(dists.size());
std::cout << "Num classes: " << numClasses << std::endl;
// labels
std::vector<float> labels(numSamples);
for (int i=0; i < numSamples; i++)
labels[i] = i % numClasses;
std::vector< std::vector<double> > features(numSamples);
for (int i=0; i < numSamples; i++)
{
features[i].resize(numFeats);
for (int j=0; j < numFeats; j++)
{
const auto lbl = static_cast<int>(labels[i]);
features[i][j] = dists[lbl](gen);
}
}
// prepare sample data
std::vector< std::vector<double> > sampleData(numFeats);
for (int i=0; i < numSamples; i++)
{
for (int j=0; j < numFeats; j++)
sampleData[j].push_back(features[i][j]);
}
/** Load dataset **/
LightGBM::Config config;
config.num_class = numClasses;
config.max_bin = 255;
config.verbosity = 10;
std::unique_ptr<LightGBM::Dataset> dset;
LightGBM::DatasetLoader loader(config, nullptr, numClasses, nullptr);
dset.reset( loader.ConstructFromSampleData(sampleData, numSamples, numSamples) );
for (int i = 0; i < numSamples; ++i)
{
const int thread_id = 0;
dset->PushOneRow(thread_id, i, features[i]);
}
dset->FinishLoad();
// check bins
for(int j=0; j < numFeats; j++)
{
const auto nbins = dset->FeatureAt(j)->bin_mapper()->num_bin();
std::cout << "Feat " << numFeats << std::endl;
std::cout << " " << dset->FeatureAt(j)->bin_mapper()->BinToValue(0) << " ";
std::cout << " " << dset->FeatureAt(j)->bin_mapper()->BinToValue(nbins-2) << " ";
std::cout << std::endl;
}
if (!dset->SetFloatField("label", labels.data(), numSamples)) {
std::cout << "Error setting label" << std::endl;
return -1;
}
/** Prepare boosting **/
LightGBM::BoostingConfig boostConfig;
boostConfig.num_iterations = 100;
boostConfig.bagging_freq = 1;
boostConfig.bagging_fraction = 0.5;
boostConfig.num_class = numClasses;
// tree params
boostConfig.tree_config.min_data_in_leaf = 10;
boostConfig.tree_config.num_leaves = 16;
//boostConfig.tree_config.min_sum_hessian_in_leaf = 0;
LightGBM::ObjectiveConfig objConfig;
objConfig.num_class = numClasses;
// objConfig.label_gain.clear();
// objConfig.label_gain.resize(numClasses, 1.0);
auto *objFunc = LightGBM::ObjectiveFunction::CreateObjectiveFunction("multiclass", objConfig);
objFunc->Init(dset->metadata(), dset->num_data());
LightGBM::MetricConfig metricConfig;
metricConfig.num_class = numClasses;
std::vector< std::unique_ptr<LightGBM::Metric> > trainMetrics;
auto metric = std::unique_ptr<LightGBM::Metric>(
LightGBM::Metric::CreateMetric("multi_logloss", metricConfig));
metric->Init(dset->metadata(), dset->num_data());
trainMetrics.push_back(std::move(metric));
auto *booster = LightGBM::Boosting::CreateBoosting(LightGBM::BoostingType::kGBDT, nullptr);
booster->Init(&boostConfig, nullptr, objFunc,
LightGBM::Common::ConstPtrInVectorWrapper<LightGBM::Metric>(trainMetrics));
booster->ResetTrainingData(&boostConfig, dset.get(), objFunc,
LightGBM::Common::ConstPtrInVectorWrapper<LightGBM::Metric>(trainMetrics));
// booster->AddValidDataset(dset.get(), LightGBM::Common::ConstPtrInVectorWrapper<LightGBM::Metric>(trainMetrics));
for (int i=0; i < boostConfig.num_iterations; i++)
{
std::cout << "Iteration " << (i+1) << std::endl;
auto scores = booster->GetEvalAt(0);
for(auto &v: scores)
std::cout << "Score: " << v << std::endl;
if (booster->TrainOneIter(nullptr, nullptr, false))
{
std::cout << "Breaking.." << std::endl;
break;
}
}
booster->SetNumIterationForPred(0); // predict with all trees
/** Predict training data **/
std::vector<int> predictedClass(numSamples);
for (int i=0; i < numSamples; i++)
{
auto predVec = booster->PredictRaw(features[i].data());
const auto predMax = std::max_element(predVec.begin(), predVec.end());
predictedClass[i] = std::distance(predVec.begin(), predMax);
}
// compute error
double err = 0;
for (int i=0; i < numSamples; i++)
{
if (predictedClass[i] != labels[i])
{
err++;
}
}
err /= labels.size();
std::cout << "Training error: " << err << std::endl;
return 0;
}
The text was updated successfully, but these errors were encountered:
I would like to second this request for C++ example code. I'm currently writing a Go wrapper for lightgbm, which I hope to open source. I have several things working (loading an existing model from file, making predictions for a file or a single row, writing a model to file), but I cannot figure out how to progress beyond one iteration and export all the trees. I tried looking through the CLI code, but couldn't figure out what was going on.
Summary
Right now, there's ZERO guide on how to use the C/C++ API. Not even one runnable example is found online.
There's 1 code example (shown below), but it's totally outdated and cannot compile.
Motivation
It's useful because some people might need to integrate LightGBM directly into their C++ code.
The example code should show how to LoadData, set GBM config, train, predict an input, and free memory. It can either use the C++ code directly, or use the exposed C API shown here: https://lightgbm.readthedocs.io/en/latest/C-API.html
Description
Below is an example program that has been outdated and can't compile. I'd like to fix that.
References
The text was updated successfully, but these errors were encountered: