mlpack  master
binary_numeric_split.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_HOEFFDING_SPLIT_BINARY_NUMERIC_SPLIT_HPP
14 #define MLPACK_METHODS_HOEFFDING_SPLIT_BINARY_NUMERIC_SPLIT_HPP
15 
17 
18 namespace mlpack {
19 namespace tree {
20 
45 template<typename FitnessFunction,
46  typename ObservationType = double>
48 {
49  public:
52 
58  BinaryNumericSplit(const size_t numClasses);
59 
66  BinaryNumericSplit(const size_t numClasses, const BinaryNumericSplit& other);
67 
74  void Train(ObservationType value, const size_t label);
75 
89  void EvaluateFitnessFunction(double& bestFitness,
90  double& secondBestFitness);
91 
92  // Return the number of children if this node were to split on this feature.
93  size_t NumChildren() const { return 2; }
94 
102  void Split(arma::Col<size_t>& childMajorities, SplitInfo& splitInfo);
103 
105  size_t MajorityClass() const;
107  double MajorityProbability() const;
108 
110  template<typename Archive>
111  void Serialize(Archive& ar, const unsigned int /* version */);
112 
113  private:
115  std::multimap<ObservationType, size_t> sortedElements;
117  arma::Col<size_t> classCounts;
118 
120  ObservationType bestSplit;
124 };
125 
126 // Convenience typedef.
127 template<typename FitnessFunction>
129 
130 } // namespace tree
131 } // namespace mlpack
132 
133 // Include implementation.
134 #include "binary_numeric_split_impl.hpp"
135 
136 #endif
BinaryNumericSplit(const size_t numClasses)
Create the BinaryNumericSplit object with the given number of classes.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: binarize.hpp:18
The BinaryNumericSplit class implements the numeric feature splitting strategy devised by Gama...
arma::Col< size_t > classCounts
The classes we have seen so far (for majority calculations).
ObservationType bestSplit
A cached best split point.
std::multimap< ObservationType, size_t > sortedElements
The elements seen so far, in sorted order.
BinaryNumericSplitInfo< ObservationType > SplitInfo
The splitting information required by the BinaryNumericSplit.
bool isAccurate
If true, the cached best split point is accurate (that is, we have not seen any more samples since we...
void Train(ObservationType value, const size_t label)
Train on the given value with the given label.
size_t MajorityClass() const
The majority class of the points seen so far.
void EvaluateFitnessFunction(double &bestFitness, double &secondBestFitness)
Given the points seen so far, evaluate the fitness function, returning the best possible gain of a bi...
void Split(arma::Col< size_t > &childMajorities, SplitInfo &splitInfo)
Given that a split should happen, return the majority classes of the (two) children and an initialize...
void Serialize(Archive &ar, const unsigned int)
Serialize the object.
double MajorityProbability() const
The probability of the majority class given the points seen so far.