mlpack  master
spill_tree.hpp
Go to the documentation of this file.
1 
11 #ifndef MLPACK_CORE_TREE_SPILL_TREE_SPILL_TREE_HPP
12 #define MLPACK_CORE_TREE_SPILL_TREE_SPILL_TREE_HPP
13 
14 #include <mlpack/prereqs.hpp>
15 #include "../space_split/midpoint_space_split.hpp"
16 #include "../statistic.hpp"
17 
18 namespace mlpack {
19 namespace tree {
20 
66 template<typename MetricType,
67  typename StatisticType = EmptyStatistic,
68  typename MatType = arma::mat,
69  template<typename HyperplaneMetricType>
70  class HyperplaneType = AxisOrthogonalHyperplane,
71  template<typename SplitMetricType, typename SplitMatType>
72  class SplitType = MidpointSpaceSplit>
73 class SpillTree
74 {
75  public:
77  typedef MatType Mat;
79  typedef typename MatType::elem_type ElemType;
81  typedef typename HyperplaneType<MetricType>::BoundType BoundType;
82 
83  private:
92  size_t count;
95  arma::Col<size_t>* pointsIndex;
99  HyperplaneType<MetricType> hyperplane;
101  BoundType bound;
103  StatisticType stat;
105  ElemType parentDistance;
113  const MatType* dataset;
116 
121  template<typename RuleType, bool Defeatist = false>
123 
128  template<typename RuleType, bool Defeatist = false>
130 
131  public:
133  template<typename RuleType>
135 
137  template<typename RuleType>
139 
141  template<typename RuleType>
143 
145  template<typename RuleType>
147 
158  SpillTree(const MatType& data,
159  const double tau = 0,
160  const size_t maxLeafSize = 20,
161  const double rho = 0.7);
162 
174  SpillTree(MatType&& data,
175  const double tau = 0,
176  const size_t maxLeafSize = 20,
177  const double rho = 0.7);
178 
190  SpillTree(SpillTree* parent,
191  arma::Col<size_t>& points,
192  const double tau = 0,
193  const size_t maxLeafSize = 20,
194  const double rho = 0.7);
195 
202  SpillTree(const SpillTree& other);
203 
210  SpillTree(SpillTree&& other);
211 
217  template<typename Archive>
218  SpillTree(
219  Archive& ar,
221 
227  ~SpillTree();
228 
230  const BoundType& Bound() const { return bound; }
232  BoundType& Bound() { return bound; }
233 
235  const StatisticType& Stat() const { return stat; }
237  StatisticType& Stat() { return stat; }
238 
240  bool IsLeaf() const;
241 
243  SpillTree* Left() const { return left; }
245  SpillTree*& Left() { return left; }
246 
248  SpillTree* Right() const { return right; }
250  SpillTree*& Right() { return right; }
251 
253  SpillTree* Parent() const { return parent; }
255  SpillTree*& Parent() { return parent; }
256 
258  const MatType& Dataset() const { return *dataset; }
259 
261  bool Overlap() const { return overlappingNode; }
262 
264  const HyperplaneType<MetricType>& Hyperplane() const { return hyperplane; }
265 
267  MetricType Metric() const { return MetricType(); }
268 
270  size_t NumChildren() const;
271 
278  template<typename VecType>
279  size_t GetNearestChild(
280  const VecType& point,
282 
289  template<typename VecType>
290  size_t GetFurthestChild(
291  const VecType& point,
293 
300  size_t GetNearestChild(const SpillTree& queryNode);
301 
308  size_t GetFurthestChild(const SpillTree& queryNode);
309 
314  ElemType FurthestPointDistance() const;
315 
323  ElemType FurthestDescendantDistance() const;
324 
326  ElemType MinimumBoundDistance() const;
327 
330  ElemType ParentDistance() const { return parentDistance; }
333  ElemType& ParentDistance() { return parentDistance; }
334 
341  SpillTree& Child(const size_t child) const;
342 
343  SpillTree*& ChildPtr(const size_t child)
344  { return (child == 0) ? left : right; }
345 
347  size_t NumPoints() const;
348 
354  size_t NumDescendants() const;
355 
363  size_t Descendant(const size_t index) const;
364 
373  size_t Point(const size_t index) const;
374 
376  ElemType MinDistance(const SpillTree& other) const
377  {
378  return bound.MinDistance(other.Bound());
379  }
380 
382  ElemType MaxDistance(const SpillTree& other) const
383  {
384  return bound.MaxDistance(other.Bound());
385  }
386 
389  {
390  return bound.RangeDistance(other.Bound());
391  }
392 
394  template<typename VecType>
395  ElemType MinDistance(const VecType& point,
397  const
398  {
399  return bound.MinDistance(point);
400  }
401 
403  template<typename VecType>
404  ElemType MaxDistance(const VecType& point,
406  const
407  {
408  return bound.MaxDistance(point);
409  }
410 
412  template<typename VecType>
414  RangeDistance(const VecType& point,
415  typename std::enable_if_t<IsVector<VecType>::value>* = 0) const
416  {
417  return bound.RangeDistance(point);
418  }
419 
421  static bool HasSelfChildren() { return false; }
422 
424  void Center(arma::vec& center) { bound.Center(center); }
425 
426  private:
435  void SplitNode(arma::Col<size_t>& points,
436  const size_t maxLeafSize,
437  const double tau,
438  const double rho);
439 
450  bool SplitPoints(const double tau,
451  const double rho,
452  const arma::Col<size_t>& points,
453  arma::Col<size_t>& leftPoints,
454  arma::Col<size_t>& rightPoints);
455  protected:
462  SpillTree();
463 
465  friend class boost::serialization::access;
466 
467  public:
471  template<typename Archive>
472  void Serialize(Archive& ar, const unsigned int version);
473 };
474 
475 } // namespace tree
476 } // namespace mlpack
477 
478 // Include implementation.
479 #include "spill_tree_impl.hpp"
480 
481 // Include everything else, if necessary.
482 #include "../spill_tree.hpp"
483 
484 #endif
BoundType bound
The bound object for this node.
Definition: spill_tree.hpp:101
ElemType MaxDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the maximum distance to another point.
Definition: spill_tree.hpp:404
SpillTree *& Right()
Modify the right child of this node.
Definition: spill_tree.hpp:250
SpillTree * left
The left child node.
Definition: spill_tree.hpp:85
MatType::elem_type ElemType
The type of element held in MatType.
Definition: spill_tree.hpp:79
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: binarize.hpp:18
SpillTree *& ChildPtr(const size_t child)
Definition: spill_tree.hpp:343
HyperplaneType< MetricType >::BoundType BoundType
The bound type.
Definition: spill_tree.hpp:81
SpillTree *& Parent()
Modify the parent of this node.
Definition: spill_tree.hpp:255
SpillTree *& Left()
Modify the left child of this node.
Definition: spill_tree.hpp:245
ElemType ParentDistance() const
Return the distance from the center of this node to the center of the parent node.
Definition: spill_tree.hpp:330
The core includes that mlpack expects; standard C++ includes and Armadillo.
math::RangeType< ElemType > RangeDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the minimum and maximum distance to another point.
Definition: spill_tree.hpp:414
A generic single-tree traverser for hybrid spill trees; see spill_single_tree_traverser.hpp for implementation.
StatisticType stat
Any extra data contained in the node.
Definition: spill_tree.hpp:103
const StatisticType & Stat() const
Return the statistic object for this node.
Definition: spill_tree.hpp:235
const HyperplaneType< MetricType > & Hyperplane() const
Get the Hyperplane instance.
Definition: spill_tree.hpp:264
bool SplitPoints(const double tau, const double rho, const arma::Col< size_t > &points, arma::Col< size_t > &leftPoints, arma::Col< size_t > &rightPoints)
Split the list of points.
arma::Col< size_t > * pointsIndex
The list of indexes of points contained in this node (non-null for leaf nodes).
Definition: spill_tree.hpp:95
const BoundType & Bound() const
Return the bound object for this node.
Definition: spill_tree.hpp:230
A hybrid spill tree is a variant of binary space trees in which the children of a node can "spill ove...
Definition: spill_tree.hpp:73
ElemType FurthestPointDistance() const
Return the furthest distance to a point held in this node.
size_t GetNearestChild(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0)
Return the index of the nearest child node to the given query point (this is an efficient estimation ...
A generic dual-tree traverser for hybrid spill trees; see spill_dual_tree_traverser.hpp for implementation.
StatisticType & Stat()
Return the statistic object for this node.
Definition: spill_tree.hpp:237
ElemType FurthestDescendantDistance() const
Return the furthest possible descendant distance.
bool localDataset
If true, we own the dataset and need to destroy it in the destructor.
Definition: spill_tree.hpp:115
const MatType & Dataset() const
Get the dataset which the tree is built on.
Definition: spill_tree.hpp:258
MetricType Metric() const
Get the metric that the tree uses.
Definition: spill_tree.hpp:267
ElemType minimumBoundDistance
The minimum distance from the center to any edge of the bound.
Definition: spill_tree.hpp:110
ElemType MinDistance(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0) const
Return the minimum distance to another point.
Definition: spill_tree.hpp:395
void SplitNode(arma::Col< size_t > &points, const size_t maxLeafSize, const double tau, const double rho)
Splits the current node, assigning its left and right children recursively.
HyperplaneBase< bound::HRectBound< MetricType >, AxisParallelProjVector > AxisOrthogonalHyperplane
AxisOrthogonalHyperplane represents a hyperplane orthogonal to an axis.
Definition: hyperplane.hpp:145
SpillTree * Parent() const
Gets the parent of this node.
Definition: spill_tree.hpp:253
ElemType parentDistance
The distance from the centroid of this node to the centroid of the parent.
Definition: spill_tree.hpp:105
SpillTree()
A default constructor.
size_t GetFurthestChild(const VecType &point, typename std::enable_if_t< IsVector< VecType >::value > *=0)
Return the index of the furthest child node to the given query point (this is an efficient estimation...
size_t count
The number of points of the dataset contained in this node (and its children).
Definition: spill_tree.hpp:92
size_t NumChildren() const
Return the number of children in this node.
SpillTree * Left() const
Gets the left child of this node.
Definition: spill_tree.hpp:243
size_t NumDescendants() const
Return the number of descendants of this node.
size_t NumPoints() const
Return the number of points in this node (0 if not a leaf).
ElemType & ParentDistance()
Modify the distance from the center of this node to the center of the parent node.
Definition: spill_tree.hpp:333
SpillTree * right
The right child node.
Definition: spill_tree.hpp:87
ElemType MinDistance(const SpillTree &other) const
Return the minimum distance to another node.
Definition: spill_tree.hpp:376
static bool HasSelfChildren()
Returns false: this tree type does not have self children.
Definition: spill_tree.hpp:421
SpillTree * parent
The parent node (NULL if this is the root of the tree).
Definition: spill_tree.hpp:89
ElemType furthestDescendantDistance
The worst possible distance to the furthest descendant, cached to speed things up.
Definition: spill_tree.hpp:108
SpillTree & Child(const size_t child) const
Return the specified child (0 will be left, 1 will be right).
bool Overlap() const
Distinguish overlapping nodes from non-overlapping nodes.
Definition: spill_tree.hpp:261
bool IsLeaf() const
Return whether or not this node is a leaf (true if it has no children).
ElemType MinimumBoundDistance() const
Return the minimum distance from the center of the node to any bound edge.
size_t Descendant(const size_t index) const
Return the index (with reference to the dataset) of a particular descendant of this node...
~SpillTree()
Deletes this node, deallocating the memory for the children and calling their destructors in turn...
void Serialize(Archive &ar, const unsigned int version)
Serialize the tree.
MatType Mat
So other classes can use TreeType::Mat.
Definition: spill_tree.hpp:77
size_t Point(const size_t index) const
Return the index (with reference to the dataset) of a particular point in this node.
math::RangeType< ElemType > RangeDistance(const SpillTree &other) const
Return the minimum and maximum distance to another node.
Definition: spill_tree.hpp:388
bool overlappingNode
Flag to distinguish overlapping nodes from non-overlapping nodes.
Definition: spill_tree.hpp:97
ElemType MaxDistance(const SpillTree &other) const
Return the maximum distance to another node.
Definition: spill_tree.hpp:382
BoundType & Bound()
Return the bound object for this node.
Definition: spill_tree.hpp:232
If value == true, then VecType is some sort of Armadillo vector or subview.
Definition: arma_traits.hpp:35
void Center(arma::vec &center)
Store the center of the bounding region in the given vector.
Definition: spill_tree.hpp:424
SpillTree * Right() const
Gets the right child of this node.
Definition: spill_tree.hpp:248
typename enable_if< B, T >::type enable_if_t
Definition: prereqs.hpp:59
const MatType * dataset
The dataset.
Definition: spill_tree.hpp:113
HyperplaneType< MetricType > hyperplane
Splitting hyperplane represented by this node.
Definition: spill_tree.hpp:99