12 #ifndef MLPACK_METHODS_DECISION_STUMP_DECISION_STUMP_HPP 13 #define MLPACK_METHODS_DECISION_STUMP_DECISION_STUMP_HPP 18 namespace decision_stump {
33 template<
typename MatType = arma::mat>
47 const arma::Row<size_t>& labels,
64 const arma::Row<size_t>& labels,
65 const arma::rowvec& weights);
84 void Train(
const MatType& data,
85 const arma::Row<size_t>& labels,
100 void Train(
const MatType& data,
101 const arma::Row<size_t>& labels,
102 const arma::rowvec& weights,
103 const size_t classes,
104 const size_t bucketSize);
114 void Classify(
const MatType& test, arma::Row<size_t>& predictedLabels);
132 template<
typename Archive>
133 void Serialize(Archive& ar,
const unsigned int );
156 template<
bool UseWeights,
typename VecType>
158 const arma::Row<size_t>& labels,
159 const arma::rowvec& weightD);
168 template<
typename VecType>
170 const arma::Row<size_t>& labels);
184 template<
typename VecType>
192 template<
typename VecType>
204 template<
bool UseWeights,
typename VecType,
typename WeightVecType>
206 const WeightVecType& weights);
217 template<
bool UseWeights>
218 void Train(
const MatType& data,
219 const arma::Row<size_t>& labels,
220 const arma::rowvec& weights);
226 #include "decision_stump_impl.hpp" void MergeRanges()
After the "split" matrix has been set up, merge ranges with identical class labels.
void Classify(const MatType &test, arma::Row< size_t > &predictedLabels)
Classification function.
size_t splitDimension
Stores the value of the dimension on which to split.
Linear algebra utility functions, generally performed on matrices or vectors.
arma::Col< size_t > & BinLabels()
Modify the labels for each split bin (be careful!).
size_t SplitDimension() const
Access the splitting dimension.
The core includes that mlpack expects; standard C++ includes and Armadillo.
int IsDistinct(const VecType &featureRow)
Returns 1 if all the values of featureRow are not same.
This class implements a decision stump.
double SetupSplitDimension(const VecType &dimension, const arma::Row< size_t > &labels, const arma::rowvec &weightD)
Sets up dimension as if it were splitting on it and finds entropy when splitting on dimension...
DecisionStump()
Create a decision stump without training.
const arma::vec & Split() const
Access the splitting values.
void Train(const MatType &data, const arma::Row< size_t > &labels, const size_t classes, const size_t bucketSize)
Train the decision stump on the given data.
arma::Col< size_t > binLabels
Stores the labels for each splitting bin.
void Serialize(Archive &ar, const unsigned int)
Serialize the decision stump.
size_t bucketSize
The minimum number of points in a bucket.
double CalculateEntropy(const VecType &labels, const WeightVecType &weights)
Calculate the entropy of the given dimension.
arma::vec & Split()
Modify the splitting values (be careful!).
void TrainOnDim(const VecType &dimension, const arma::Row< size_t > &labels)
After having decided the dimension on which to split, train on that dimension.
size_t & SplitDimension()
Modify the splitting dimension (be careful!).
arma::vec split
Stores the splitting values after training.
const arma::Col< size_t > BinLabels() const
Access the labels for each split bin.
size_t classes
The number of classes (we must store this for boosting).
double CountMostFreq(const VecType &subCols)
Count the most frequently occurring element in subCols.