13 #ifndef MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP 14 #define MLPACK_METHODS_HOEFFDING_TREES_HOEFFDING_TREE_HPP 55 template<
typename FitnessFunction = GiniImpurity,
56 template<
typename>
class NumericSplitType =
58 template<
typename>
class CategoricalSplitType =
59 HoeffdingCategoricalSplit
91 template<
typename MatType>
94 const arma::Row<size_t>& labels,
96 const bool batchTraining =
true,
101 const CategoricalSplitType<FitnessFunction>& categoricalSplitIn
102 = CategoricalSplitType<FitnessFunction>(0, 0),
103 const NumericSplitType<FitnessFunction>& numericSplitIn =
104 NumericSplitType<FitnessFunction>(0));
126 const size_t numClasses,
131 const CategoricalSplitType<FitnessFunction>& categoricalSplitIn
132 = CategoricalSplitType<FitnessFunction>(0, 0),
133 const NumericSplitType<FitnessFunction>& numericSplitIn =
134 NumericSplitType<FitnessFunction>(0),
135 std::unordered_map<
size_t, std::pair<size_t, size_t>>*
159 template<
typename MatType>
160 void Train(
const MatType& data,
161 const arma::Row<size_t>& labels,
162 const bool batchTraining =
true);
170 template<
typename VecType>
171 void Train(
const VecType& point,
const size_t label);
228 template<
typename VecType>
238 template<
typename VecType>
239 size_t Classify(
const VecType& point)
const;
252 template<
typename VecType>
253 void Classify(
const VecType& point,
size_t& prediction,
double& probability)
263 template<
typename MatType>
264 void Classify(
const MatType& data, arma::Row<size_t>& predictions)
const;
277 template<
typename MatType>
279 arma::Row<size_t>& predictions,
280 arma::rowvec& probabilities)
const;
288 template<
typename Archive>
289 void Serialize(Archive& ar,
const unsigned int );
341 #include "hoeffding_tree_impl.hpp"
NumericSplitType< FitnessFunction > NumericSplit
Allow access to the numeric split type.
Auxiliary information for a dataset, including mappings to/from strings and the datatype of each dime...
size_t Classify(const VecType &point) const
Classify the given point, using this node and the entire (sub)tree beneath it.
HoeffdingTree(const MatType &data, const data::DatasetInfo &datasetInfo, const arma::Row< size_t > &labels, const size_t numClasses, const bool batchTraining=true, const double successProbability=0.95, const size_t maxSamples=0, const size_t checkInterval=100, const size_t minSamples=100, const CategoricalSplitType< FitnessFunction > &categoricalSplitIn=CategoricalSplitType< FitnessFunction >(0, 0), const NumericSplitType< FitnessFunction > &numericSplitIn=NumericSplitType< FitnessFunction >(0))
Construct the Hoeffding tree with the given parameters and given training data.
bool ownsInfo
Whether or not we own the dataset information.
The HoeffdingTree object represents all of the necessary information for a Hoeffding-bound-based deci...
Linear algebra utility functions, generally performed on matrices or vectors.
size_t checkInterval
The number of samples that should be seen before checking for a split.
~HoeffdingTree()
Clean up memory.
size_t CalculateDirection(const VecType &point) const
Given a point and that this node is not a leaf, calculate the index of the child node this point woul...
The core includes that mlpack expects; standard C++ includes and Armadillo.
double & MajorityProbability()
Modify the probability of the majority class.
size_t minSamples
The minimum number of samples for splitting.
size_t numSamples
The number of samples seen so far by this node.
HoeffdingNumericSplit< FitnessFunction, double > HoeffdingDoubleNumericSplit
Convenience typedef.
size_t SplitDimension() const
Get the splitting dimension (size_t(-1) if no split).
double SuccessProbability() const
Get the confidence required for a split.
double MajorityProbability() const
Get the probability of the majority class (based on training samples).
bool ownsMappings
Indicates whether or not we own the mappings.
CategoricalSplitType< FitnessFunction >::SplitInfo categoricalSplit
If the split is categorical, this holds the splitting information.
std::vector< NumericSplitType< FitnessFunction > > numericSplits
Information for splitting of numeric features (used before split).
void Train(const MatType &data, const arma::Row< size_t > &labels, const bool batchTraining=true)
Train on a set of points, either in streaming mode or in batch mode, with the given labels...
void Serialize(Archive &ar, const unsigned int)
Serialize the split.
size_t MinSamples() const
Get the minimum number of samples for a split.
size_t MaxSamples() const
Get the maximum number of samples before a split is forced.
NumericSplitType< FitnessFunction >::SplitInfo numericSplit
If the split is numeric, this holds the splitting information.
const HoeffdingTree & Child(const size_t i) const
Get a child.
size_t maxSamples
The maximum number of samples we can see before splitting.
size_t CheckInterval() const
Get the number of samples before a split check is performed.
size_t MajorityClass() const
Get the majority class.
size_t majorityClass
The majority class of this node.
std::unordered_map< size_t, std::pair< size_t, size_t > > * dimensionMappings
This structure is owned by this node only if it is the root of the tree.
double successProbability
The required probability of success for a split to be performed.
double majorityProbability
The empirical probability of a point this node saw having the majority class.
size_t numClasses
The number of classes this node is trained on.
std::vector< CategoricalSplitType< FitnessFunction > > categoricalSplits
Information for splitting of categorical features (used before split).
size_t SplitCheck()
Check if a split would satisfy the conditions of the Hoeffding bound with the node's specified succes...
std::vector< HoeffdingTree * > children
If the split has occurred, these are the children.
CategoricalSplitType< FitnessFunction > CategoricalSplit
Allow access to the categorical split type.
size_t NumChildren() const
Get the number of children.
size_t & MajorityClass()
Modify the majority class.
size_t splitDimension
The dimension that this node has split on.
const data::DatasetInfo * datasetInfo
The dataset information.
HoeffdingTree & Child(const size_t i)
Modify a child.
void CreateChildren()
Given that this node should split, create the children.