mlpack  master
hoeffding_tree_model.hpp
Go to the documentation of this file.
1 
7 #ifndef MLPACK_METHODS_HOEFFDING_TREE_HOEFFDING_TREE_MODEL_HPP
8 #define MLPACK_METHODS_HOEFFDING_TREE_HOEFFDING_TREE_MODEL_HPP
9 
10 #include "hoeffding_tree.hpp"
11 #include "binary_numeric_split.hpp"
12 #include "information_gain.hpp"
13 
14 namespace mlpack {
15 namespace tree {
16 
23 {
24  public:
26  enum TreeType
27  {
32  };
33 
38  typedef HoeffdingTree<GiniImpurity, BinaryDoubleNumericSplit,
44  typedef HoeffdingTree<InformationGain, BinaryDoubleNumericSplit,
46 
55 
62 
69 
76 
83 
88 
109  void BuildModel(const arma::mat& dataset,
110  const data::DatasetInfo& datasetInfo,
111  const arma::Row<size_t>& labels,
112  const size_t numClasses,
113  const bool batchTraining,
114  const double successProbability,
115  const size_t maxSamples,
116  const size_t checkInterval,
117  const size_t minSamples,
118  const size_t bins,
119  const size_t observationsBeforeBinning);
120 
129  void Train(const arma::mat& dataset,
130  const arma::Row<size_t>& labels,
131  const bool batchTraining);
132 
140  void Classify(const arma::mat& dataset,
141  arma::Row<size_t>& predictions) const;
142 
151  void Classify(const arma::mat& dataset,
152  arma::Row<size_t>& predictions,
153  arma::rowvec& probabilities) const;
154 
158  size_t NumNodes() const;
159 
163  template<typename Archive>
164  void Serialize(Archive& ar, const unsigned int /* version */)
165  {
166  ar & data::CreateNVP(type, "type");
167 
168  // Clear memory if needed.
169  if (Archive::is_loading::value)
170  {
171  delete giniHoeffdingTree;
172  delete giniBinaryTree;
173  delete infoHoeffdingTree;
174  delete infoBinaryTree;
175 
176  giniHoeffdingTree = NULL;
177  giniBinaryTree = NULL;
178  infoHoeffdingTree = NULL;
179  infoBinaryTree = NULL;
180  }
181 
182  // Fake dataset info may be needed to create fake trees.
183  data::DatasetInfo info;
184  if (type == GINI_HOEFFDING)
185  {
186  // Create fake tree to load into if needed.
187  if (Archive::is_loading::value)
188  giniHoeffdingTree = new GiniHoeffdingTreeType(info, 1, 1);
189  ar & data::CreateNVP(*giniHoeffdingTree, "giniHoeffdingTree");
190  }
191  else if (type == GINI_BINARY)
192  {
193  // Create fake tree to load into if needed.
194  if (Archive::is_loading::value)
195  giniBinaryTree = new GiniBinaryTreeType(info, 1, 1);
196  ar & data::CreateNVP(*giniBinaryTree, "giniBinaryTree");
197  }
198  else if (type == INFO_HOEFFDING)
199  {
200  // Create fake tree to load into if needed.
201  if (Archive::is_loading::value)
202  infoHoeffdingTree = new InfoHoeffdingTreeType(info, 1, 1);
203  ar & data::CreateNVP(*infoHoeffdingTree, "infoHoeffdingTree");
204  }
205  else if (type == INFO_BINARY)
206  {
207  // Create fake tree to load into if needed.
208  if (Archive::is_loading::value)
209  infoBinaryTree = new InfoBinaryTreeType(info, 1, 1);
210  ar & data::CreateNVP(*infoBinaryTree, "infoBinaryTree");
211  }
212  }
213 
214  private:
217 
220  GiniHoeffdingTreeType* giniHoeffdingTree;
221 
224  GiniBinaryTreeType* giniBinaryTree;
225 
228  InfoHoeffdingTreeType* infoHoeffdingTree;
229 
232  InfoBinaryTreeType* infoBinaryTree;
233 };
234 
235 } // namespace tree
236 } // namespace mlpack
237 
238 #endif
InfoHoeffdingTreeType * infoHoeffdingTree
This is used if we are using the information gain and the Hoeffding numeric split.
Auxiliary information for a dataset, including mappings to/from strings and the datatype of each dime...
The HoeffdingTree object represents all of the necessary information for a Hoeffding-bound-based deci...
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: binarize.hpp:18
HoeffdingTree< InformationGain, BinaryDoubleNumericSplit, HoeffdingCategoricalSplit > InfoBinaryTreeType
Convenience typedef for INFO_BINARY tree type.
BinaryNumericSplit< FitnessFunction, double > BinaryDoubleNumericSplit
size_t NumNodes() const
Get the number of nodes in the tree.
FirstShim< T > CreateNVP(T &t, const std::string &name, typename std::enable_if_t< HasSerialize< T >::value > *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
HoeffdingNumericSplit< FitnessFunction, double > HoeffdingDoubleNumericSplit
Convenience typedef.
TreeType
This enumerates the four types of trees we can hold.
void Classify(const arma::mat &dataset, arma::Row< size_t > &predictions) const
Using the model, classify the given test points.
GiniHoeffdingTreeType * giniHoeffdingTree
This is used if we are using the Gini impurity and the Hoeffding numeric split.
HoeffdingTree< InformationGain, HoeffdingDoubleNumericSplit, HoeffdingCategoricalSplit > InfoHoeffdingTreeType
Convenience typedef for INFO_HOEFFDING tree type.
The standard information gain criterion, used for calculating gain in decision trees.
void BuildModel(const arma::mat &dataset, const data::DatasetInfo &datasetInfo, const arma::Row< size_t > &labels, const size_t numClasses, const bool batchTraining, const double successProbability, const size_t maxSamples, const size_t checkInterval, const size_t minSamples, const size_t bins, const size_t observationsBeforeBinning)
Train the model on the given dataset with the given labels.
This class is a serializable Hoeffding tree model that can hold four different types of Hoeffding tre...
HoeffdingTreeModel(const TreeType &type=GINI_HOEFFDING)
Construct the Hoeffding tree model, but don&#39;t initialize any tree.
HoeffdingTree< GiniImpurity, BinaryDoubleNumericSplit, HoeffdingCategoricalSplit > GiniBinaryTreeType
Convenience typedef for GINI_BINARY tree type.
This is the standard Hoeffding-bound categorical feature proposed in the paper below: ...
~HoeffdingTreeModel()
Clean up the given model.
void Serialize(Archive &ar, const unsigned int)
Serialize the model.
TreeType type
The type of tree we are using.
void Train(const arma::mat &dataset, const arma::Row< size_t > &labels, const bool batchTraining)
Train in streaming mode on the given dataset.
InfoBinaryTreeType * infoBinaryTree
This is used if we are using the information gain and the binary numeric split.
GiniBinaryTreeType * giniBinaryTree
This is used if we are using the Gini impurity and the binary numeric split.
HoeffdingTree< GiniImpurity, HoeffdingDoubleNumericSplit, HoeffdingCategoricalSplit > GiniHoeffdingTreeType
Convenience typedef for GINI_HOEFFDING tree type.
HoeffdingTreeModel & operator=(const HoeffdingTreeModel &other)
Copy the Hoeffding tree model from the given other model.