mlpack  master
discrete_distribution.hpp
Go to the documentation of this file.
1 
13 #ifndef MLPACK_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
14 #define MLPACK_CORE_DISTRIBUTIONS_DISCRETE_DISTRIBUTION_HPP
15 
16 #include <mlpack/prereqs.hpp>
17 #include <mlpack/core/util/log.hpp>
19 
20 namespace mlpack {
21 namespace distribution {
22 
46 {
47  public:
52  probabilities(std::vector<arma::vec>(1)){ /* Nothing to do. */ }
53 
62  DiscreteDistribution(const size_t numObservations) :
63  probabilities(std::vector<arma::vec>(1,
64  arma::ones<arma::vec>(numObservations) / numObservations))
65  { /* Nothing to do. */ }
66 
75  DiscreteDistribution(const arma::Col<size_t>& numObservations)
76  {
77  for (size_t i = 0; i < numObservations.n_elem; i++)
78  {
79  const size_t numObs = size_t(numObservations[i]);
80  if (numObs <= 0)
81  {
82  std::ostringstream oss;
83  oss << "number of observations for dimension " << i << " is 0, but "
84  << "must be greater than 0";
85  throw std::invalid_argument(oss.str());
86  }
87  probabilities.push_back(arma::ones<arma::vec>(numObs) / numObs);
88  }
89  }
90 
97  DiscreteDistribution(const std::vector<arma::vec>& probabilities)
98  {
99  for (size_t i = 0; i < probabilities.size(); i++)
100  {
101  arma::vec temp = probabilities[i];
102  double sum = accu(temp);
103  if (sum > 0)
104  this->probabilities.push_back(temp / sum);
105  else
106  {
107  this->probabilities.push_back(arma::ones<arma::vec>(temp.n_elem)
108  / temp.n_elem);
109  }
110  }
111  }
112 
116  size_t Dimensionality() const { return probabilities.size(); }
117 
126  double Probability(const arma::vec& observation) const
127  {
128  double probability = 1.0;
129  // Ensure the observation has the same dimension with the probabilities
130  if (observation.n_elem != probabilities.size())
131  {
132  Log::Debug << "the obversation must has the same dimension with the probabilities"
133  << "the observation's dimension is" << observation.n_elem << "but the dimension of "
134  << "probabilities is" << probabilities.size() << std::endl;
135  return probability;
136  }
137  for (size_t dimension = 0; dimension < observation.n_elem; dimension++)
138  {
139  // Adding 0.5 helps ensure that we cast the floating point to a size_t
140  // correctly.
141  const size_t obs = size_t(observation(dimension) + 0.5);
142 
143  // Ensure that the observation is within the bounds.
144  if (obs >= probabilities[dimension].n_elem)
145  {
146  Log::Debug << "DiscreteDistribution::Probability(): received observation "
147  << obs << "; observation must be in [0, " << probabilities[dimension].n_elem
148  << "] for this distribution." << std::endl;
149  }
150  probability *= probabilities[dimension][obs];
151  }
152 
153  return probability;
154  }
155 
164  double LogProbability(const arma::vec& observation) const
165  {
166  // TODO: consider storing log probabilities instead?
167  return log(Probability(observation));
168  }
169 
177  arma::vec Random() const;
178 
186  void Train(const arma::mat& observations);
187 
197  void Train(const arma::mat& observations,
198  const arma::vec& probabilities);
199 
201  arma::vec& Probabilities(const size_t dim = 0) { return probabilities[dim]; }
203  const arma::vec& Probabilities(const size_t dim = 0) const
204  { return probabilities[dim]; }
205 
209  template<typename Archive>
210  void Serialize(Archive& ar, const unsigned int /* version */)
211  {
212  // We serialize the vector manually since there seem to be some problems
213  // with some boost versions.
214  size_t dimensionality;
215  dimensionality = probabilities.size();
216  ar & data::CreateNVP(dimensionality, "dimensionality");
217 
218  if (Archive::is_loading::value)
219  {
220  probabilities.clear();
221  probabilities.resize(dimensionality);
222  }
223 
224  for (size_t i = 0; i < dimensionality; ++i)
225  {
226  std::ostringstream oss;
227  oss << "probabilities" << i;
228  ar & data::CreateNVP(probabilities[i], oss.str());
229  }
230  }
231 
232  private:
235  std::vector<arma::vec> probabilities;
236 };
237 
238 } // namespace distribution
239 } // namespace mlpack
240 
241 #endif
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: binarize.hpp:18
void Serialize(Archive &ar, const unsigned int)
Serialize the distribution.
arma::vec Random() const
Return a randomly generated observation (one-dimensional vector; one observation) according to the pr...
std::vector< arma::vec > probabilities
The probabilities for each dimension; each arma::vec represents the probabilities for the observation...
A discrete distribution where the only observations are discrete observations.
The core includes that mlpack expects; standard C++ includes and Armadillo.
FirstShim< T > CreateNVP(T &t, const std::string &name, typename std::enable_if_t< HasSerialize< T >::value > *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
size_t Dimensionality() const
Get the dimensionality of the distribution.
Definition: prereqs.hpp:56
double Probability(const arma::vec &observation) const
Return the probability of the given observation.
DiscreteDistribution()
Default constructor, which creates a distribution that has no observations.
DiscreteDistribution(const arma::Col< size_t > &numObservations)
Define the multidimensional discrete distribution as having numObservations possible observations...
const arma::vec & Probabilities(const size_t dim=0) const
Modify the vector of probabilities for the given dimension.
void Train(const arma::mat &observations)
Estimate the probability distribution directly from the given observations.
DiscreteDistribution(const std::vector< arma::vec > &probabilities)
Define the multidimensional discrete distribution as having the given probabilities for each observat...
double LogProbability(const arma::vec &observation) const
Return the log probability of the given observation.
arma::vec & Probabilities(const size_t dim=0)
Return the vector of probabilities for the given dimension.
static MLPACK_EXPORT util::NullOutStream Debug
MLPACK_EXPORT is required for global variables, so that they are properly exported by the Windows com...
Definition: log.hpp:79
Miscellaneous math random-related routines.
DiscreteDistribution(const size_t numObservations)
Define the discrete distribution as having numObservations possible observations. ...