From ec51b8530443a8ab6c3ee03fffc41cb0cff53b88 Mon Sep 17 00:00:00 2001 From: Thomas Purcell <purcell@fhi-berlin.mpg.de> Date: Sun, 31 May 2020 17:39:01 +0200 Subject: [PATCH] Added documentation --- configure.ac | 3 +- src/descriptor_identifier/Model/Model.hpp | 67 +++++++++-- src/descriptor_identifier/SISSORegressor.hpp | 105 +++++++++++++++--- src/feature_creation/node/FeatureNode.cpp | 4 +- src/feature_creation/node/FeatureNode.hpp | 2 +- src/feature_creation/node/Node.hpp | 2 +- .../node/operator_nodes/OperatorNode.cpp | 6 +- .../node/operator_nodes/OperatorNode.hpp | 1 - .../value_storage/nodes_value_containers.hpp | 67 ++++++++++- src/feature_creation/units/Unit.hpp | 94 +++++++++++++++- 10 files changed, 308 insertions(+), 43 deletions(-) diff --git a/configure.ac b/configure.ac index 65a1b3a0..be0643f9 100644 --- a/configure.ac +++ b/configure.ac @@ -72,8 +72,9 @@ elif test x${with_mpi} = xintel ; then elif test x${with_mpi} = xmvapich ; then echo "..mvapich requested.." elif test x${with_mpi} != xno ; then - AC_MSG_ERROR("Please specify MPI implementation (openmpi, intel, mvapich)") + AC_MSG_ERROR("Please specify MPI implementation: openmpi, intel, mvapich") fi + if test x${with_mpi} != xno; then ACX_MPI AC_PROG_CC([$MPICC]) diff --git a/src/descriptor_identifier/Model/Model.hpp b/src/descriptor_identifier/Model/Model.hpp index 9a3180c4..b4364916 100644 --- a/src/descriptor_identifier/Model/Model.hpp +++ b/src/descriptor_identifier/Model/Model.hpp @@ -3,35 +3,86 @@ #include <feature_creation/node/Node.hpp> +/** + * @brief Class to store the models found from SISSO + * + */ class Model { - int _n_samp; - int _n_dim; + int _n_samp; //!< The number of samples per feature + int _n_dim; //!< Dimension of the model - std::vector<std::shared_ptr<Node>> _feats; + std::vector<std::shared_ptr<Node>> _feats; //!< List of features in the model - std::unique_ptr<double[]> _coefs; - std::unique_ptr<double[]> _prop; - std::unique_ptr<double[]> _error; - std::unique_ptr<double[]> _D; + std::unique_ptr<double[]> _coefs; //!< Coefficients for teh features + std::unique_ptr<double[]> _prop; //!< The property to be modeled + std::unique_ptr<double[]> _error; //!< The error of the model + std::unique_ptr<double[]> _D; //!< The Descriptor matrix - std::vector<double> _prop_est; + std::vector<double> _prop_est; //!< The estimated Property public: + /** + * @brief Constructor for the model + * + * @param prop The property + * @param feats The features for the model + */ Model(std::vector<double> prop, std::vector<std::shared_ptr<Node>> feats); + + /** + * @brief The copy constructor + * + * @param o The model to be copied + */ Model(Model& o); + + /** + * @brief The Move constructor + * + * @param o The Model to be moved + */ Model(Model&& o); + /** + * @brief Convert the model to a string + + * @return The string of the model + */ std::string toString() const; + + /** + * @brief Accessor function to _prop_est + */ inline std::vector<double>& predict(){return _prop_est;} + + /** + * @brief Copy the error into a new array + * + * @param res pointer to the beginning of the array + */ inline void copy_error(double* res){std::copy_n(_error.get(), _n_samp, res);} + + /** + * @brief The rmes of the model + */ inline double rmse(){return util_funcs::norm(_error.get(), _n_samp);} + + /** + * @brief The max Absolute error of the array + */ inline double max_ae() { return *std::max_element(_error.get(), _error.get() + _n_samp, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}); } }; +/** + * @brief Print a model to an string stream + * + * @param outStream The output stream the model is to be printed + * @param model The model to be printed + */ std::ostream& operator<< (std::ostream& outStream, const Model& model); #endif \ No newline at end of file diff --git a/src/descriptor_identifier/SISSORegressor.hpp b/src/descriptor_identifier/SISSORegressor.hpp index 43bc2eb6..018e75e1 100644 --- a/src/descriptor_identifier/SISSORegressor.hpp +++ b/src/descriptor_identifier/SISSORegressor.hpp @@ -4,47 +4,124 @@ #include <feature_creation/feature_space/FeatureSpace.hpp> #include <descriptor_identifier/Model/Model.hpp> +/** + * @brief SISSO Regressor class, to find the best models, and store them + * + */ class SISSORegressor { protected: - std::shared_ptr<FeatureSpace> _feat_space; - std::shared_ptr<MPI_Interface> _mpi_comm; + std::shared_ptr<FeatureSpace> _feat_space; //!< Feature Space for the problem + std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPI Communicator - int _n_samp; - int _n_dim; - int _lwork; - int _rank; + int _n_samp; //!< the number of samples per feature + int _n_dim; //!< Number of dimensions to calculate + int _lwork; //!< size of the work array + int _rank; //!< Ranks for the least squares problem - std::unique_ptr<double[]> _a; - std::unique_ptr<double[]> _b; - std::unique_ptr<double[]> _ones; - std::unique_ptr<double[]> _error; - std::unique_ptr<double[]> _work; - std::unique_ptr<double[]> _s; + std::unique_ptr<double[]> _a; //!< A matrix for least squares + std::unique_ptr<double[]> _b; //!< Solution array for least squares + std::unique_ptr<double[]> _ones; //!< Array of ones to copy over for least squares comparison + std::unique_ptr<double[]> _error; //!< Array to calculate the residuals for the models + std::unique_ptr<double[]> _work; //!< The work array for least squares problems + std::unique_ptr<double[]> _s; //!< The S array for least squares problems - std::vector<Model> _models; - std::vector<double> _prop; + std::vector<Model> _models; //!< List of models + std::vector<double> _prop; //!< Property array public: + /** + * @brief Constructor for the Regressor + * + * @param prop Property to model + * @param n_dim Maximum dimension of the model + */ SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim); + /** + * @brief Get the optimal size of the working array + * + * @param n_dim Dimension of the least squares matrix + * @return Optimal size of the working array + */ int get_opt_lwork(int n_dim); + + /** + * @brief Preform Least squares optimization +\ * + * @param inds Feature indexes to get the model of + * @param coeffs Coefficients for the model + */ void least_squares(std::vector<int>& inds, double* coeffs); + /** + * @brief Set the residual for the next step + * + * @param inds indexes of the selected features + * @param coeffs Coefficients of the model + */ void set_error(std::vector<int>& inds, double* coeffs); + + /** + * @brief Set the A matrix for the least squares problem + * + * @param inds indexes of the selected features + */ void set_a(std::vector<int>& inds); + /** + * @brief Fit the models + */ void fit(); + + /** + * @brief Preform the l0 normalization for a property or the residual + * + * @param prop Property to fit + * @param n_dim the dimensionality of the model + */ void l0_norm(std::vector<double>& prop, int n_dim); + /** + * @brief Acessor function for feat_space + */ inline std::shared_ptr<FeatureSpace> feat_space(){return _feat_space;} + + /** + * @brief Acessor function for prop + */ inline std::vector<double>& prop(){return _prop;} + + /** + * @brief Acessor function for models + */ inline std::vector<Model>& models(){return _models;} + + /** + * @brief Acessor function for n_samp + */ inline int n_samp(){return _n_samp;} + + /** + * @brief Acessor function for n_dim + */ inline int n_dim(){return _n_dim;} + + /** + * @brief Acessor function for lwork + */ inline int lwork(){return _lwork;} + + /** + * @brief Acessor function for rank + */ inline int rank(){return _rank;} + + /** + * @brief Acessor function for { + */ inline double* error(){return _error.get();} + }; #endif \ No newline at end of file diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp index d5fc2d78..373bb586 100644 --- a/src/feature_creation/node/FeatureNode.cpp +++ b/src/feature_creation/node/FeatureNode.cpp @@ -4,7 +4,7 @@ FeatureNode::FeatureNode() {} FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit) : - Node<0>(feat_ind, value.size()), + Node(feat_ind, value.size()), _expr(expr), _unit(unit) { @@ -12,7 +12,7 @@ FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> val } FeatureNode::FeatureNode(const FeatureNode &o) : - Node<0>(o) + Node(o) {} // BOOST_CLASS_EXPORT(FeatureNode) diff --git a/src/feature_creation/node/FeatureNode.hpp b/src/feature_creation/node/FeatureNode.hpp index 3eb2330f..68f0a1a5 100644 --- a/src/feature_creation/node/FeatureNode.hpp +++ b/src/feature_creation/node/FeatureNode.hpp @@ -102,7 +102,7 @@ public: * @param ar Archive representation of node */ template <typename Archive> - void serialize(Archive& ar) + void serialize(Archive& ar, const unsigned int version) { ar & boost::serialization::base_object<Node>(*this); ar & _expr; diff --git a/src/feature_creation/node/Node.hpp b/src/feature_creation/node/Node.hpp index 497561f0..fbc18b80 100644 --- a/src/feature_creation/node/Node.hpp +++ b/src/feature_creation/node/Node.hpp @@ -111,7 +111,7 @@ public: * @param ar Archive representation of node */ template <typename Archive> - void serialize(Archive& ar) + void serialize(Archive& ar, const unsigned int version) { ar & _n_samp; ar & _feat_ind; diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.cpp b/src/feature_creation/node/operator_nodes/OperatorNode.cpp index 373aaf19..ff1808ca 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.cpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.cpp @@ -1,9 +1,9 @@ #include <feature_creation/node/operator_nodes/OperatorNode.hpp> -OperatorNode() +OperatorNode::OperatorNode() {} -OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) : +OperatorNode::OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) : Node(feat_ind, feats[0]->n_samp()), _rung_offset(rung), _feats(feats) @@ -11,7 +11,7 @@ OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) : set_feat_val_ptrs(); } -OperatorNode(const OperatorNode &o) : +OperatorNode::OperatorNode(const OperatorNode &o) : Node(o), _rung_offset(o._rung_offset), _feats(o._feats), diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.hpp b/src/feature_creation/node/operator_nodes/OperatorNode.hpp index d4c480a8..e2cf435f 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.hpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.hpp @@ -16,7 +16,6 @@ * @details * */ -template<size_t N> class OperatorNode: public Node { friend class boost::serialization::access; diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.hpp b/src/feature_creation/node/value_storage/nodes_value_containers.hpp index c49437dc..d06c82ac 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.hpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.hpp @@ -8,26 +8,81 @@ namespace node_value_arrs { - extern int N_SAMPLES; - extern int N_STORE_FEATURES; - extern int N_RUNGS_STORED; + extern int N_SAMPLES; //!< Number of samples in the nodes + extern int N_STORE_FEATURES; //!< Number of features with stored values + extern int N_RUNGS_STORED; //!< Number of rungs with values stored - extern std::unique_ptr<int[]> TEMP_STORAGE_REG; - extern std::unique_ptr<double[]> VALUES_ARR; - extern std::unique_ptr<double[]> TEMP_STORAGE_ARR; + extern std::unique_ptr<int[]> TEMP_STORAGE_REG; //!< Register to see which feature is stored in each slot + extern std::unique_ptr<double[]> VALUES_ARR; //!< Value of the stored features + extern std::unique_ptr<double[]> TEMP_STORAGE_ARR; //!< Array to temporarily store feature values + /** + * @brief Get the maximum number of new features for each rung + * + * @param new_op operator that will add new features + * @param n_current_features current number of of features in the rung + * + * @return [description] + */ int get_number_new_features(std::string new_op, int n_current_features); + /** + * @brief Get the maximum number of features to store + * + * @param allowed_operators list of allowed operators + * @param n_dims Number of dimensions to store + * @param n_feats number of features in Phi_0 + * @return [description] + */ int get_max_number_features(std::vector<std::string> allowed_operators, int n_dims, int n_feats); + /** + * @brief set of the value arrays + * @details Take initial parameters and construct the feature arraies + * + * @param n_samples number of samples per feature + * @param n_dims Number of dimensions to store + * @param n_primary_feat number of primary features + * @param allowed_operators list of allowed operators + */ void setup_values_arr(int n_samples, int n_dims, int n_primary_feat, std::vector<std::string> allowed_operators); + /** + * @brief Get a reference slot/feature register + * + * @param ind Feature index + * @param offset Offset integer for TEMP_STORE_ARRAY + * + * @return The register element for a given feature index and offset + */ inline int& temp_storage_reg(int ind, int offset = 0){return TEMP_STORAGE_REG[(ind % N_STORE_FEATURES) + offset * N_STORE_FEATURES];} + /** + * @brief Access element of the permanent storage array + * + * @param feature_ind The feature index to access + * + * @return pointer to the feature;s data array + */ inline double* access_value_arr(int feature_ind){return VALUES_ARR.get() + feature_ind*N_SAMPLES;} + /** + * @brief Access element of temporary storage array + * + * @param slot The slot of the temporary storage arrays + * + * @return pointer to the feature's temporary storage + */ inline double* access_temp_storage(int slot){return TEMP_STORAGE_ARR.get() + slot*N_SAMPLES;} + /** + * @brief Access the value_ptr to a feature + * + * @param ind Feature index + * @param offset the offset for the storage + * + * @return The value pointer + */ inline double* get_value_ptr(int ind, int offset = 0) { if(ind < N_STORE_FEATURES) diff --git a/src/feature_creation/units/Unit.hpp b/src/feature_creation/units/Unit.hpp index 1e3e4f0b..9f360f9f 100644 --- a/src/feature_creation/units/Unit.hpp +++ b/src/feature_creation/units/Unit.hpp @@ -15,34 +15,111 @@ using StringRange = boost::iterator_range<std::string::const_iterator>; - +/** + * @brief Class to define the units of the features + */ class Unit { protected: - std::map<std::string, double> _dct; + std::map<std::string, double> _dct; //!< the dictionary describing the units <unit string, unit power> public: + /** + * @brief Base Constrcutor + * @details Creates a unit with no entries + */ Unit(); - Unit(std::map<std::string, double> _dct); + + /** + * @brief Constructor of the unit with the dictionary representation + * + * @param dct dictionary representation of the dictionary + */ + Unit(std::map<std::string, double> dct); + + /** + * @brief Construct the unit based on a string + * @details Take a string representation of a unit, and build the Unit + * + * @param unit_str The string to build the unit + */ Unit(std::string unit_str); + /** + * @brief Copy Constructor + * + * @param o Unit to copy + */ Unit(const Unit &o); + /** + * @brief Convert the unit into a string + */ std::string toString() const; + /** + * @brief Multiply operator for units + * + * @param unit_2 The second unit to multiply by + * @return The product of this unit with unit_2 + */ Unit operator*(Unit unit_2); + + /** + * @brief Divide operator for units + * + * @param unit_2 The second unit to divide by + * @return The quotient of this unit with unit_2 + */ Unit operator/(Unit unit_2); + + /** + * @brief Exponentiation operator for units + * + * @param power power to exponentiate the unit + * @return The unit raised to the power + */ Unit operator^(double power); + + /** + * @brief Inverse operator for units + * + * @return The inverse of this unit + */ Unit inverse(); + /** + * @brief Determine if a second unit is equal to this one + * + * @param unit_2 The unit to compare against + * @return True if unit_2 equals this unit + */ bool equal(Unit unit_2); + + /** + * @brief Determine if a second unit is equal to this one + * + * @param unit_2 The unit to compare against + * @return True if unit_2 equals this unit + */ inline bool operator== (Unit unit_2){return equal(unit_2);} - inline bool operator!= (Unit unit_2){return !equal(unit_2);} - friend std::ostream& operator<< (std::ostream& outStream, const Unit& unit); + /** + * @brief Determine if a second unit is not equal to this one + * + * @param unit_2 The unit to compare against + * @return False if unit_2 equals this unit + */ + inline bool operator!= (Unit unit_2){return !equal(unit_2);} + /** + * @brief Accessor function to the dictionary + */ inline std::map<std::string, double> dct(){return _dct;} + /** + * @brief Function to serialize the data for MPI communication + */ template <typename Archive> void serialize(Archive& ar, const unsigned int version) { @@ -50,7 +127,12 @@ public: } }; - +/** + * @brief Operator to print the unit to a string stream + * + * @param outStream Stream to print the unit to + * @param unit Unit to print to the string stream + */ std::ostream& operator<< (std::ostream& outStream, const Unit& unit); #endif \ No newline at end of file -- GitLab