Skip to content

Commit

Permalink
feat: quantiles and average binning for RF histograms / maxBins=0 to …
Browse files Browse the repository at this point in the history
…disable binning
  • Loading branch information
ahuber21 committed May 15, 2023
1 parent f1129b6 commit 377250c
Show file tree
Hide file tree
Showing 8 changed files with 222 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ enum splitterMode
{
best, /* Calculates best split from aggregate best feature splits for every node. */
random /* Calculates best split from aggregate random feature splits for every node. */
}

/**
* <a name = "DAAL-ENUM-ALGORITHMS__DECISION_FOREST__TRAINING__BINNINGSTRATEGY">
* </ a> * \brief Available strategies to compute data bins in 'hist' method * /
*/
enum BinningStrategy {
/* Frequency quantiles -> same number of data points per bin */
quantiles,
/* Same feature value range per bin */
averages
};

/**
Expand Down Expand Up @@ -145,6 +156,9 @@ class DAAL_EXPORT Parameter
size_t minBinSize; /*!< Used with 'hist' split finding method only.
Minimal number of observations in a bin. Default is 5 */
splitterMode splitter; /*!< Sets node splitting method. Default is best */
BinningStrategy binningStrategy; /*!< Used with 'hist' split finding method only.
Selects the strategy to group data points into bins.
Allowed values are 'quantiles' (default), 'averages' */
};
/* [Parameter source code] */
} // namespace interface2
Expand Down
18 changes: 14 additions & 4 deletions cpp/daal/src/algorithms/dtrees/dtrees_feature_type_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#ifndef __DTREES_FEATURE_TYPE_HELPER_H__
#define __DTREES_FEATURE_TYPE_HELPER_H__

#include "include/algorithms/decision_forest/decision_forest_training_parameter.h"
#include "src/externals/service_memory.h"
#include "src/data_management/service_numeric_table.h"

Expand Down Expand Up @@ -69,12 +70,20 @@ class FeatureTypes
int _lastUnordered = -1;
};

using daal::algorithms::decision_forest::training::BinningStrategy;

struct BinParams
{
BinParams(size_t _maxBins, size_t _minBinSize) : maxBins(_maxBins), minBinSize(_minBinSize) {}
BinParams(const BinParams & o) : maxBins(o.maxBins), minBinSize(o.minBinSize) {}

size_t maxBins = 256;
BinParams(size_t _maxBins, size_t _minBinSize, BinningStrategy _binningStrategy = BinningStrategy::quantiles)
: maxBins(_maxBins), minBinSize(_minBinSize), binningStrategy(_binningStrategy)
{}
BinParams(const BinParams & o) : maxBins(o.maxBins), minBinSize(o.minBinSize), binningStrategy(o.binningStrategy) {}

/* Strategy to create bins for feature values. Default: quantiles */
BinningStrategy binningStrategy = BinningStrategy::quantiles;
/* Maximum number of bins for indexed data. Default: 256 */
size_t maxBins = 256;
/* Minimum bin width (number of data points per bin). Default: 5*/
size_t minBinSize = 5;
};

Expand Down Expand Up @@ -103,6 +112,7 @@ class IndexedFeatures
IndexedFeatures() : _data(nullptr), _entries(nullptr), _sizeOfIndex(sizeof(IndexType)), _nCols(0), _nRows(0), _capacity(0), _maxNumIndices(0) {}
~IndexedFeatures();

//initialize the feature indices, i.e. bins
template <typename algorithmFPType, CpuType cpu>
services::Status init(const NumericTable & nt, const FeatureTypes * featureTypes = nullptr, const BinParams * pBimPrm = nullptr);

Expand Down
Loading

0 comments on commit 377250c

Please sign in to comment.