mlpack  master
refined_start.hpp
Go to the documentation of this file.
1 
14 #ifndef MLPACK_METHODS_KMEANS_REFINED_START_HPP
15 #define MLPACK_METHODS_KMEANS_REFINED_START_HPP
16 
17 #include <mlpack/prereqs.hpp>
18 
19 namespace mlpack {
20 namespace kmeans {
21 
38 {
39  public:
45  RefinedStart(const size_t samplings = 100,
46  const double percentage = 0.02) :
48 
59  template<typename MatType>
60  void Cluster(const MatType& data,
61  const size_t clusters,
62  arma::mat& centroids) const;
63 
75  template<typename MatType>
76  void Cluster(const MatType& data,
77  const size_t clusters,
78  arma::Row<size_t>& assignments) const;
79 
81  size_t Samplings() const { return samplings; }
83  size_t& Samplings() { return samplings; }
84 
86  double Percentage() const { return percentage; }
88  double& Percentage() { return percentage; }
89 
91  template<typename Archive>
92  void Serialize(Archive& ar, const unsigned int /* version */)
93  {
94  ar & data::CreateNVP(samplings, "samplings");
95  ar & data::CreateNVP(percentage, "percentage");
96  }
97 
98  private:
100  size_t samplings;
102  double percentage;
103 };
104 
105 } // namespace kmeans
106 } // namespace mlpack
107 
108 // Include implementation.
109 #include "refined_start_impl.hpp"
110 
111 #endif
size_t Samplings() const
Get the number of samplings that will be performed.
void Serialize(Archive &ar, const unsigned int)
Serialize the object.
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: binarize.hpp:18
void Cluster(const MatType &data, const size_t clusters, arma::mat &centroids) const
Partition the given dataset into the given number of clusters according to the random sampling scheme...
RefinedStart(const size_t samplings=100, const double percentage=0.02)
Create the RefinedStart object, optionally specifying parameters for the number of samplings to perfo...
The core includes that mlpack expects; standard C++ includes and Armadillo.
FirstShim< T > CreateNVP(T &t, const std::string &name, typename std::enable_if_t< HasSerialize< T >::value > *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
double & Percentage()
Modify the percentage of the data used by each subsampling.
double percentage
The percentage of the data to use for each subsampling.
double Percentage() const
Get the percentage of the data used by each subsampling.
size_t samplings
The number of samplings to perform.
A refined approach for choosing initial points for k-means clustering.
size_t & Samplings()
Modify the number of samplings that will be performed.