Skip to content

Commit

Permalink
feat: make useConstFeatures configurable via params
Browse files Browse the repository at this point in the history
  • Loading branch information
ahuber21 committed Mar 6, 2023
1 parent 33bcec7 commit 8839ba4
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ class DAAL_EXPORT Parameter
Default is 256. Increasing the number results in higher computation costs */
size_t minBinSize; /*!< Used with 'hist' split finding method only.
Minimal number of observations in a bin. Default is 5 */
bool useConstFeatures; /*!< Use or ignore constant-valued features when splitting nodes. Default is false */
};
/* [Parameter source code] */
} // namespace interface2
Expand Down
16 changes: 12 additions & 4 deletions cpp/daal/src/algorithms/dtrees/dtrees_train_data_helper.i
Original file line number Diff line number Diff line change
Expand Up @@ -377,20 +377,28 @@ public:
DAAL_ASSERT(iDst == getNumOOBIndices());
}

/**
* Helper to investigate if feature values are different
* \param[in] iFeature The indexed feature whose value we compare against
* \param[in] aIdx Pointer to index _aResponse values
* \param[in] n Number of _aResponse values to compare
*/
bool hasDiffFeatureValues(IndexType iFeature, const int * aIdx, size_t n) const
{
if (this->indexedFeatures().numIndices(iFeature) == 1) return false; //single value only
const IndexedFeatures::IndexType * indexedFeature = this->indexedFeatures().data(iFeature);
const auto aResponse = this->_aResponse.get();
const IndexedFeatures::IndexType idx0 = indexedFeature[aResponse[aIdx[0]].idx];
size_t i = 1;
for (; i < n; ++i)
for (size_t i = 1; i < n; ++i)
{
const Response & r = aResponse[aIdx[i]];
const IndexedFeatures::IndexType idx = indexedFeature[r.idx];
if (idx != idx0) break;
// if the indices are different, we found different feature
// values and return true
if (idx != idx0) return true;
}
return (i != n);
// all feature indices pointed to idx0 and are thus the same
return false;
}

protected:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -518,8 +518,7 @@ protected:
_minSamplesSplit(2),
_minWeightLeaf(0.),
_minImpurityDecrease(-daal::services::internal::EpsilonVal<algorithmFPType>::get() * x->getNumberOfRows()),
_maxLeafNodes(0),
_useConstFeatures(false)
_maxLeafNodes(0)
{
if (_impurityThreshold < _accuracy) _impurityThreshold = _accuracy;

Expand Down Expand Up @@ -616,8 +615,7 @@ protected:
void chooseFeatures()
{
const size_t n = nFeatures();
const size_t nGen = _nFeaturesPerNode;
// const size_t nGen = (!_par.memorySavingMode && !_maxLeafNodes && !_useConstFeatures) ? n : _nFeaturesPerNode;
const size_t nGen = (!_par.memorySavingMode && !_maxLeafNodes && !_par.useConstFeatures) ? n : _nFeaturesPerNode;
*_numElems += n;
RNGs<IndexType, cpu> rng;
rng.uniformWithoutReplacement(nGen, _aFeatureIdx.get(), _aFeatureIdx.get() + nGen, _engineImpl->getState(), 0, n);
Expand Down Expand Up @@ -653,8 +651,7 @@ protected:
const Parameter & _par;
const size_t _nSamples;
const size_t _nFeaturesPerNode;
const size_t _nFeatureBufs; //number of buffers to get feature values (to process features independently in parallel)
const bool _useConstFeatures; //including constant features in number of features per node
const size_t _nFeatureBufs; //number of buffers to get feature values (to process features independently in parallel)
mutable size_t _nConstFeature;

const BinIndexType * _binIndex;
Expand Down Expand Up @@ -684,7 +681,7 @@ services::Status TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, c
_aFeatureBuf.reset(_nFeatureBufs);
_aFeatureIndexBuf.reset(_nFeatureBufs);

if (false && !_par.memorySavingMode && !_maxLeafNodes && !_useConstFeatures)
if (!_par.memorySavingMode && !_maxLeafNodes && !_par.useConstFeatures)
{
_aFeatureIdx.reset(maxFeatures * 2); // maxFeatures elements are used by algorithm, others are used internally by generator
_aConstFeatureIdx.reset(maxFeatures * 2); // first maxFeatures elements are used for saving indices of constant features,
Expand Down Expand Up @@ -811,7 +808,7 @@ typename DataHelper::NodeType::Base * TrainBatchTaskBase<algorithmFPType, BinInd
typename DataHelper::NodeType::Base * left =
buildDepthFirst(s, iStart, split.nLeft, level + 1, split.left, bUnorderedFeaturesUsed, nClasses, split.leftWeights);
_helper.convertLeftImpToRight(n, curImpurity, split);
if (!_par.memorySavingMode && !_useConstFeatures)
if (!_par.memorySavingMode && !_par.useConstFeatures)
{
for (size_t i = _nConstFeature; i > 0; --i)
{
Expand Down Expand Up @@ -1100,7 +1097,7 @@ bool TrainBatchTaskBase<algorithmFPType, BinIndexType, DataHelper, cpu>::findBes
const auto iFeature = _aFeatureIdx[i];
const bool bUseIndexedFeatures = (!_par.memorySavingMode) && (fact > qMax * float(_helper.indexedFeatures().numIndices(iFeature)));

if (false && !_maxLeafNodes && !_useConstFeatures && !_par.memorySavingMode)
if (!_maxLeafNodes && !_par.useConstFeatures && !_par.memorySavingMode)
{
if (_aConstFeatureIdx[maxFeatures + iFeature] > 0) continue; //selected feature is known constant feature
if (!_helper.hasDiffFeatureValues(iFeature, aIdx, n))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ Parameter::Parameter()
minWeightFractionInLeafNode(0.),
minImpurityDecreaseInSplitNode(0.),
maxLeafNodes(0),
minBinSize(5),
maxBins(256)
maxBins(256),
minBinSize(5)
{}
} // namespace interface2
Status checkImpl(const decision_forest::training::interface2::Parameter & prm)
Expand Down

0 comments on commit 8839ba4

Please sign in to comment.