15 #ifndef MLPACK_CORE_DATA_DATASET_INFO_HPP 16 #define MLPACK_CORE_DATA_DATASET_INFO_HPP 19 #include <unordered_map> 20 #include <boost/bimap.hpp> 35 template <
typename PolicyType>
63 const size_t dimension);
85 const size_t dimension);
99 template <
typename eT>
100 void MapTokens(
const std::vector<std::string>& tokens,
size_t& row,
101 arma::Mat<eT>& matrix);
125 template<
typename Archive>
133 const PolicyType&
Policy()
const;
139 void Policy(PolicyType&& policy);
146 using BiMapType = boost::bimap<std::string, typename PolicyType::MappedType>;
151 using MapType = std::unordered_map<size_t, std::pair<BiMapType, size_t>>;
167 #include "dataset_mapper_impl.hpp" Auxiliary information for a dataset, including mappings to/from strings and the datatype of each dime...
PolicyType policy
policy object tells dataset mapper how the categorical values should be
Linear algebra utility functions, generally performed on matrices or vectors.
Datatype
The Datatype enum specifies the types of data mlpack algorithms can use.
Datatype Type(const size_t dimension) const
Return the type of a given dimension (numeric or categorical).
std::unordered_map< size_t, std::pair< BiMapType, size_t >> MapType
The core includes that mlpack expects; standard C++ includes and Armadillo.
FirstShim< T > CreateNVP(T &t, const std::string &name, typename std::enable_if_t< HasSerialize< T >::value > *=0)
Call this function to produce a name-value pair; this is similar to BOOST_SERIALIZATION_NVP(), but should be used for types that have a Serialize() function (or contain a type that has a Serialize() function) instead of a serialize() function.
size_t Dimensionality() const
Get the dimensionality of the DatasetMapper object (that is, how many dimensions it has information f...
DatasetMapper(const size_t dimensionality=0)
Create the DatasetMapper object with the given dimensionality.
MapType maps
maps object stores string and numerical pairs.
std::vector< Datatype > types
Types of each dimension.
PolicyType::MappedType MapString(const std::string &string, const size_t dimension)
Given the string and the dimension to which it belongs, return its numeric mapping.
void MapTokens(const std::vector< std::string > &tokens, size_t &row, arma::Mat< eT > &matrix)
MapTokens turns vector of strings into numeric variables and puts them into a given matrix...
size_t NumMappings(const size_t dimension) const
Get the number of mappings for a particular dimension.
const PolicyType & Policy() const
Return the policy of the mapper.
boost::bimap< std::string, typename PolicyType::MappedType > BiMapType
const std::string & UnmapString(const size_t value, const size_t dimension)
Return the string that corresponds to a given value in a given dimension.
test cpp RESULT_VARIABLE MEX_RESULT_TRASH OUTPUT_VARIABLE MEX_OUTPUT ERROR_VARIABLE MEX_ERROR_TRASH string(REGEX MATCH"Warning: You are using"MEX_WARNING"${MEX_OUTPUT}") if(MEX_WARNING) string(REGEX REPLACE".*using [a-zA-Z]* version \"([0-9.]*)[^\"]*\".*""\\1"OTHER_COMPILER_VERSION"$
void Serialize(Archive &ar, const unsigned int)
Serialize the dataset information.
PolicyType::MappedType UnmapValue(const std::string &string, const size_t dimension)
Return the value that corresponds to a given string in a given dimension.