mlpack  master
gini_gain.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_METHODS_DECISION_TREE_GINI_GAIN_HPP
14 #define MLPACK_METHODS_DECISION_TREE_GINI_GAIN_HPP
15 
16 #include <mlpack/core.hpp>
17 
18 namespace mlpack {
19 namespace tree {
20 
27 class GiniGain
28 {
29  public:
37  template<typename RowType>
38  static double Evaluate(const RowType& labels,
39  const size_t numClasses)
40  {
41  // Corner case: if there are no elements, the impurity is zero.
42  if (labels.n_elem == 0)
43  return 0.0;
44 
45  arma::Col<size_t> counts(numClasses);
46  counts.zeros();
47  for (size_t i = 0; i < labels.n_elem; ++i)
48  counts[labels[i]]++;
49 
50  // Calculate the Gini impurity of the un-split node.
51  double impurity = 0.0;
52  for (size_t i = 0; i < numClasses; ++i)
53  {
54  const double f = ((double) counts[i] / (double) labels.n_elem);
55  impurity += f * (1.0 - f);
56  }
57 
58  return -impurity;
59  }
60 
68  static double Range(const size_t numClasses)
69  {
70  // The best possible case is that only one class exists, which gives a Gini
71  // impurity of 0. The worst possible case is that the classes are evenly
72  // distributed, which gives n * (1/n * (1 - 1/n)) = 1 - 1/n.
73  return 1.0 - (1.0 / double(numClasses));
74  }
75 };
76 
77 } // namespace tree
78 } // namespace mlpack
79 
80 #endif
static double Range(const size_t numClasses)
Return the range of the Gini impurity for the given number of classes.
Definition: gini_gain.hpp:68
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: binarize.hpp:18
Include all of the base components required to write MLPACK methods, and the main MLPACK Doxygen docu...
The Gini gain, a measure of set purity usable as a fitness function (FitnessFunction) for decision tr...
Definition: gini_gain.hpp:27
static double Evaluate(const RowType &labels, const size_t numClasses)
Evaluate the Gini impurity on the given set of labels.
Definition: gini_gain.hpp:38