mlpack  master
missing_policy.hpp
Go to the documentation of this file.
1 
12 #ifndef MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
13 #define MLPACK_CORE_DATA_MAP_POLICIES_MISSING_POLICY_HPP
14 
15 #include <mlpack/prereqs.hpp>
16 #include <unordered_map>
17 #include <boost/bimap.hpp>
19 #include <limits>
20 
21 namespace mlpack {
22 namespace data {
31 {
32  public:
33  // typedef of MappedType
34  using MappedType = double;
35 
37  {
38  // Nothing to initialize here.
39  }
40 
48  explicit MissingPolicy(std::set<std::string> missingSet) :
49  missingSet(std::move(missingSet))
50  {
51  // Nothing to initialize here.
52  }
53 
68  template <typename MapType>
70  const size_t dimension,
71  MapType& maps,
72  std::vector<Datatype>& types)
73  {
74  // mute the unused parameter warning (does nothing here.)
75  (void)types;
76  // If this condition is true, either we have no mapping for the given string
77  // or we have no mappings for the given dimension at all. In either case,
78  // we create a mapping.
79  const double NaN = std::numeric_limits<double>::quiet_NaN();
80  if (missingSet.count(string) != 0 &&
81  (maps.count(dimension) == 0 ||
82  maps[dimension].first.left.count(string) == 0))
83  {
84  // This string does not exist yet.
85  typedef boost::bimap<std::string, MappedType>::value_type PairType;
86  maps[dimension].first.insert(PairType(string, NaN));
87 
88  size_t& numMappings = maps[dimension].second;
89  ++numMappings;
90  return NaN;
91  }
92  else
93  {
94  // This string already exists in the mapping or not included in
95  // the missingSet.
96  return NaN;
97  }
98  }
99 
116  template <typename eT, typename MapType>
117  void MapTokens(const std::vector<std::string>& tokens,
118  size_t& row,
119  arma::Mat<eT>& matrix,
120  MapType& maps,
121  std::vector<Datatype>& types)
122  {
123  // MissingPolicy allows double type matrix only, because it uses NaN.
124  static_assert(std::is_same<eT, double>::value, "You must use double type "
125  " matrix in order to apply MissingPolicy");
126 
127  std::stringstream token;
128  for (size_t i = 0; i != tokens.size(); ++i)
129  {
130  token.str(tokens[i]);
131  token>>matrix.at(row, i);
132  // if the token is not number, map it.
133  // or if token is a number, but is included in the missingSet, map it.
134  if (token.fail() || missingSet.find(tokens[i]) != std::end(missingSet))
135  {
136  const eT val = static_cast<eT>(this->MapString(tokens[i], row, maps,
137  types));
138  matrix.at(row, i) = val;
139  }
140  token.clear();
141  }
142  }
143 
144  private:
145  // Note that missingSet and maps are different.
146  // missingSet specifies which value/string should be mapped.
147  std::set<std::string> missingSet;
148 }; // class MissingPolicy
149 
150 } // namespace data
151 } // namespace mlpack
152 
153 #endif
Linear algebra utility functions, generally performed on matrices or vectors.
Definition: binarize.hpp:18
The core includes that mlpack expects; standard C++ includes and Armadillo.
MissingPolicy(std::set< std::string > missingSet)
Create the MissingPolicy object with the given missingSet.
Definition: prereqs.hpp:56
std::set< std::string > missingSet
MappedType MapString(const std::string &string, const size_t dimension, MapType &maps, std::vector< Datatype > &types)
Given the string and the dimension to which it belongs by the user, and the maps and types given by t...
test cpp RESULT_VARIABLE MEX_RESULT_TRASH OUTPUT_VARIABLE MEX_OUTPUT ERROR_VARIABLE MEX_ERROR_TRASH string(REGEX MATCH"Warning: You are using"MEX_WARNING"${MEX_OUTPUT}") if(MEX_WARNING) string(REGEX REPLACE".*using [a-zA-Z]* version \"([0-9.]*)[^\"]*\".*""\\1"OTHER_COMPILER_VERSION"$
Definition: CMakeLists.txt:18
void MapTokens(const std::vector< std::string > &tokens, size_t &row, arma::Mat< eT > &matrix, MapType &maps, std::vector< Datatype > &types)
MapTokens turns vector of strings into numeric variables and puts them into a given matrix...
MissingPolicy is used as a helper class for DatasetMapper.