diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index 6dd2787f4dc3b808371901bc912932b030ccd286..07c09289182e203fa0e5f2c325e1fc9245bdf3a7 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -53,26 +53,6 @@ FeatureSpace::FeatureSpace( _scores.reserve(_phi.size()); } -FeatureSpace::FeatureSpace(FeatureSpace &o) : - _max_phi(o._max_phi), - _n_sis_select(o._n_sis_select), - _n_samp(o._n_samp), - _n_feat(o._n_feat), - _max_abs_feat_val(o._max_abs_feat_val), - _start_gen(o._start_gen), - _start_ind(o._start_ind), - _prop(o._prop), - _scores(o._scores), - _D(o._D), - _allowed_ops(o._allowed_ops), - _un_operators(o._un_operators), - _bin_operators(o._bin_operators), - _com_bin_operators(o._com_bin_operators), - _phi_selected(o._phi_selected), - _phi(o._phi), - _phi_0(o._phi_0) -{} - void FeatureSpace::generate_feature_space() { std::vector<double> scores(_phi.size()); @@ -179,6 +159,7 @@ void FeatureSpace::generate_feature_space() } std::cout << "DONE"<< std::endl; } + _n_feat = _phi.size(); } void FeatureSpace::project_r(double* prop) diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index cfb0e687bbf71b7ccb75537d57ac1c8510cba801..54cf09e6c2dc94d361d4a72dee68de1623618e5b 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -10,34 +10,47 @@ #include <iostream> // namespace mpi = boost::mpi; - +/** + * @brief Feature Space for SISSO calculations + * @details Stores and performs all feature calculations for SIS + * + */ class FeatureSpace { - std::shared_ptr<MPI_Interface> _mpi_comm; - int _max_phi; - int _n_sis_select; - int _n_samp; - int _n_feat; + std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPi communicator + int _max_phi; //!< Maximum rung for the feature creation + int _n_sis_select; //!< Number of features to select for each dimensions + int _n_samp; //!< Number of samples + int _n_feat; //!< Total number of features - double _max_abs_feat_val; + double _max_abs_feat_val; //!< Maximum absolute value for any feature - std::vector<int> _start_gen; - std::vector<int> _start_ind; + std::vector<int> _start_gen; //!< list of starting index for each generation - std::vector<double> _prop; - std::vector<double> _scores; - std::vector<double> _D; + std::vector<double> _prop; //!< property to learn + std::vector<double> _scores; //!< projection scores for each feature + std::vector<double> _D; //!< matrix of slected features - std::vector<std::string> _allowed_ops; - std::vector<un_op_node_gen> _un_operators; - std::vector<bin_op_node_gen> _bin_operators; - std::vector<bin_op_node_gen> _com_bin_operators; + std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings + std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators + std::vector<bin_op_node_gen> _bin_operators; //!< list of all binary operators + std::vector<bin_op_node_gen> _com_bin_operators; //!< list of all commutable binary operators - std::vector<node_ptr> _phi_selected; - std::vector<node_ptr> _phi; - std::vector<node_ptr> _phi_0; + std::vector<node_ptr> _phi_selected; //!< selected features + std::vector<node_ptr> _phi; //!< all features + std::vector<node_ptr> _phi_0; //!< initial feature space public: + /** + * @brief Constructor for the feature space + * @details constructs the feature space from an initial set of features and a list of allowed operatiors + * + * @param mpi_comm MPI communicator for the calculations + * @param allowed_ops list of allowed operators + * @param max_phi highest rung value for the calculation + * @param n_sis_select number of features to select during each SIS step + * @param max_abs_feat_val maximum absolute feature value + */ FeatureSpace( std::shared_ptr<MPI_Interface> mpi_comm, std::vector<node_ptr> phi_0, @@ -47,19 +60,59 @@ public: double max_abs_feat_val=1e27 ); - FeatureSpace(FeatureSpace &o); - + /** + * @brief Generate the full feature set from the allowed operators and initial feature set + * @details populates phi with all features from an initial set and the allowed operators + */ void generate_feature_space(); + /** + * @brief Accessor function for _phi_selected + */ inline std::vector<node_ptr> phi_selected(){return _phi_selected;}; + + /** + * @brief Accessor function for _phi + */ inline std::vector<node_ptr> phi(){return _phi;}; + + /** + * @brief Accessor function for _phi_0 + */ inline std::vector<node_ptr> phi0(){return _phi_0;}; + + /** + * @brief Accessor function for _scores + */ inline std::vector<double>& scores(){return _scores;}; + + /** + * @brief Accessor function for _mpi_comm + */ inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;} + + /** + * @brief Access the value of a selected feature + * @details Given a feature index get the selected values + * + * @param ind index of the selected feature + */ inline double* D(int ind){return &_D[ind * _n_samp];} + /** + * @brief calculate the projection scores for all features for a given property + * @details Calculate the projection score based on the Pearson correlation + * + * @param prop [description] + */ void project_r(double* prop); + /** + * @brief Perform SIS on a feature set with a specified property + * @details Perform sure-independence screening with either the correct property + * + * @param prop The property to calculate SIS from + */ void sis(std::vector<double>& prop); }; diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp index 373bb586195ebd011949d389e49d349762092b2e..d5fc2d7801c2982da48ba2f30f7a13aadde9c24c 100644 --- a/src/feature_creation/node/FeatureNode.cpp +++ b/src/feature_creation/node/FeatureNode.cpp @@ -4,7 +4,7 @@ FeatureNode::FeatureNode() {} FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit) : - Node(feat_ind, value.size()), + Node<0>(feat_ind, value.size()), _expr(expr), _unit(unit) { @@ -12,7 +12,7 @@ FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> val } FeatureNode::FeatureNode(const FeatureNode &o) : - Node(o) + Node<0>(o) {} // BOOST_CLASS_EXPORT(FeatureNode) diff --git a/src/feature_creation/node/FeatureNode.hpp b/src/feature_creation/node/FeatureNode.hpp index 76781110358836bfefac0efcb5954e282dd98be7..3eb2330fecddea24fde08a6f5c2ec463a8f6ed32 100644 --- a/src/feature_creation/node/FeatureNode.hpp +++ b/src/feature_creation/node/FeatureNode.hpp @@ -13,35 +13,96 @@ typedef std::function<double(double)> unary_op_func; typedef std::function<double(double, double)> binary_op_func; +/** + * @brief Node that describe the leaves of the operator graph (Initial features in Phi_0) + */ class FeatureNode: public Node { friend class boost::serialization::access; protected: - std::string _expr; - Unit _unit; + std::string _expr; //!< Expression of the feature + Unit _unit; //!< Unit for the feature public: + /** + * @brief Base Constructor + * @details This is only used for serialization + */ FeatureNode(); + + /** + * @brief Constructs a feature node + * + * @param feat_ind index of the feature + * @param expr Expression for the feature + * @param value Value of the feature for each sample + * @param unit Unit of the feature + */ FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit); + + /** + * @brief Copy constructor + * + * @param o Node to be copied + */ FeatureNode(const FeatureNode &o); + /** + * @brief Get the expression for the overall descriptor (From head node down) + */ inline std::string expr(){return _expr;} + + /** + * @brief Get the unit for the overall descriptor (From head node down) + */ inline Unit unit(){return _unit;} + + /** + * @brief Set the value for the feature + */ inline void set_value(){return;} + + /** + * @brief Access the rung of the feature (Depth of the chart) + */ inline int rung(){return 0;} + + /** + * @brief Set up the feature value pointers + */ inline void set_feat_val_ptrs(){return;} + + /** + * @brief Accessor function to the feature value pointers + */ inline std::vector<double*> feat_value_ptrs(){return std::vector<double*>(0);} + + /** + * @brief Check if the feature contains NaN + */ inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});} + + /** + * @brief Check if feature is constant + */ inline bool is_const() { double mean = util_funcs::mean(value_ptr(), _n_samp); return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;}); } + /** + * @brief Accessor function to the value of the feature + */ inline double* value_ptr(){return node_value_arrs::get_value_ptr(_feat_ind);} + /** + * @brief Serialization function to send over MPI + * + * @param ar Archive representation of node + */ template <typename Archive> - void serialize(Archive& ar, const unsigned int version) + void serialize(Archive& ar) { ar & boost::serialization::base_object<Node>(*this); ar & _expr; diff --git a/src/feature_creation/node/Node.hpp b/src/feature_creation/node/Node.hpp index 261150edc98ece424db1c9adc1e40c4c021d3274..497561f090a42642ef8f1f403b4e1fc10ce30b2a 100644 --- a/src/feature_creation/node/Node.hpp +++ b/src/feature_creation/node/Node.hpp @@ -17,39 +17,101 @@ typedef std::function<double(double)> unary_op_func; typedef std::function<double(double, double)> binary_op_func; - +/** + * @brief Base class for a Node + * @details Class used to describe a Node on the descriptor graph. Features are treated as an operation graph, these are the nodes on that graph. + * + */ class Node { protected: - int _n_samp; - int _feat_ind; + int _n_samp; //!< Number of samples in the feature + int _feat_ind; //!< Index of the feature public: + /** + * @brief Base Constructor + * @details This is only used for serialization + */ Node(); - Node(int feat_ind, int n_samp); - Node(const Node &o); - - // bool equal(Node node_2); + /** + * @brief Constructor that specifies feature index and number of samples + * + * @param feat_ind index of the feature + * @param n_samp number of samples in the node + */ + Node(int feat_ind, int n_samp); - // inline bool operator== (Node node_2){return equal(node_2);} - // inline bool operator!= (Node node_2){return !equal(node_2);} + /** + * @brief Copy constructor + * + * @param o Node to be copied + */ + Node(const Node &o); + /** + * @brief Acesssor function to get the number of samples + */ inline int n_samp(){return _n_samp;} + + /** + * @brief Accessor function to get the feature ind + */ inline int& feat_ind(){return _feat_ind;} + /** + * @brief Get the expression for the overall descriptor (From head node down) + */ virtual std::string expr() = 0; + + /** + * @brief Get the unit for the overall descriptor (From head node down) + */ virtual Unit unit() = 0; + + /** + * @brief Set the value for the feature + */ virtual void set_value() = 0; + + /** + * @brief Access the rung of the feature (Depth of the chart) + */ virtual int rung() = 0; + + /** + * @brief Set up the feature value pointers + */ virtual void set_feat_val_ptrs() = 0; + + /** + * @brief Accessor function to the feature value pointers + */ virtual std::vector<double*> feat_value_ptrs() = 0; + + /** + * @brief Accessor function to the value of the feature + */ virtual double* value_ptr() = 0; + + /** + * @brief Check if the feature contains NaN + */ virtual bool is_nan() = 0; + + /** + * @brief Check if feature is constant + */ virtual bool is_const() = 0; + /** + * @brief Serialization function to send over MPI + * + * @param ar Archive representation of node + */ template <typename Archive> - void serialize(Archive& ar, const unsigned int version) + void serialize(Archive& ar) { ar & _n_samp; ar & _feat_ind; diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.cpp b/src/feature_creation/node/operator_nodes/OperatorNode.cpp index ff1808cab47411381ce0ec3c8ba83f01f73a4ebe..373aaf19fdf40f783e85d245f9826a5729f4e912 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.cpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.cpp @@ -1,9 +1,9 @@ #include <feature_creation/node/operator_nodes/OperatorNode.hpp> -OperatorNode::OperatorNode() +OperatorNode() {} -OperatorNode::OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) : +OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) : Node(feat_ind, feats[0]->n_samp()), _rung_offset(rung), _feats(feats) @@ -11,7 +11,7 @@ OperatorNode::OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) set_feat_val_ptrs(); } -OperatorNode::OperatorNode(const OperatorNode &o) : +OperatorNode(const OperatorNode &o) : Node(o), _rung_offset(o._rung_offset), _feats(o._feats), diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.hpp b/src/feature_creation/node/operator_nodes/OperatorNode.hpp index 5749bfc6c3fa31a2fb11b4a8f57a8de0fafc1da4..d4c480a87c0308abf8556ef1e3d25c4a910f7426 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.hpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.hpp @@ -8,8 +8,15 @@ #include <boost/serialization/export.hpp> #include <boost/serialization/shared_ptr.hpp> #include <boost/serialization/split_member.hpp> -#include <boost/serialization/vector.hpp> +#include <boost/serialization/array.hpp> + +/** + * @brief Base class to describe operator nodes + * @details + * + */ +template<size_t N> class OperatorNode: public Node { friend class boost::serialization::access; @@ -20,11 +27,27 @@ protected: std::vector<double*> _feat_val_ptrs; public: + /** + * @brief Base Constructor + * @details This is only used for serialization + */ OperatorNode(); + + /** + * @brief Constructor + * @details Constructs an operator node with a set of features + * + * @param feats array of features that the operator will act on + * @param rung run the feature is on (depth of the tree) + * @param feat_ind index of the feature + */ OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind); + /** + * @brief Base Constructor + * @details This is only used for serialization + */ OperatorNode(const OperatorNode &o); - virtual std::string expr() = 0; virtual Unit unit() = 0; @@ -40,9 +63,19 @@ public: set_feat_val_ptrs(); } + /** + * @brief Accessor function to the feature value pointers + */ inline std::vector<double*> feat_value_ptrs(){return _feat_val_ptrs;} + + /** + * @brief Access the rung of the feature (Depth of the chart) + */ inline int rung(){return _rung_offset;} + /** + * @brief Accessor function to the value of the feature + */ inline double* value_ptr() { if((_rung_offset > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_reg(_feat_ind) != _feat_ind)) @@ -51,13 +84,23 @@ public: return node_value_arrs::get_value_ptr(_feat_ind); } + /** + * @brief Check if the feature contains NaN + */ inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});} + + /** + * @brief Check if feature is constant + */ inline bool is_const() { double mean = util_funcs::mean(value_ptr(), _n_samp); return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;}); } + /** + * @brief Set up the feature value pointers + */ inline void set_feat_val_ptrs() { _value_ptr = node_value_arrs::get_value_ptr(_feat_ind, 0); diff --git a/src/feature_creation/node/operator_nodes/allowed_ops.hpp b/src/feature_creation/node/operator_nodes/allowed_ops.hpp index 6d03d379454e577ee7314271f3a464eaf50537f0..b4079cb978c114098e653aff30fb5a695297a144 100644 --- a/src/feature_creation/node/operator_nodes/allowed_ops.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_ops.hpp @@ -27,8 +27,8 @@ typedef std::function<node_ptr(node_ptr, node_ptr, int, int)> bin_op_node_gen; namespace allowed_op_maps { - extern std::map<std::string, un_op_node_gen> unary_operator_map; - extern std::map<std::string, bin_op_node_gen> binary_operator_map; + extern std::map<std::string, un_op_node_gen> unary_operator_map; //!< map that converts a string into an operator node generator function for all unary operators + extern std::map<std::string, bin_op_node_gen> binary_operator_map; //!< map that converts a string into an operator node generator function for all binary operators void set_node_maps(); }; diff --git a/src/feature_creation/node/operator_nodes/functions.hpp b/src/feature_creation/node/operator_nodes/functions.hpp index df037093e2a2c5ac8844795aa3b251a6ecdf6396..ba31571c220a6ed84ce64b73e438828dd3e7194f 100644 --- a/src/feature_creation/node/operator_nodes/functions.hpp +++ b/src/feature_creation/node/operator_nodes/functions.hpp @@ -8,86 +8,205 @@ typedef std::function<void(int, std::vector<double*>&, double*)> op_func; namespace allowed_op_funcs { + /** + * @brief Function to perform the addition operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void add(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, inputs[1], out, std::plus<double>()); } + /** + * @brief Function to perform the subtraction operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void sub(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, inputs[1], out, std::minus<double>()); } + /** + * @brief Function to perform the absolute difference operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void abs_diff(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, inputs[1], out, [](double in_0, double in_1){return std::abs(in_0 - in_1);}); } + /** + * @brief Function to perform the multiply operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void mult(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, inputs[1], out, std::multiplies<double>()); } + /** + * @brief Function to perform the division operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void div(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, inputs[1], out, std::divides<double>()); } + /** + * @brief Function to perform the exponential operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void exp(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::exp(in_0);}); } + /** + * @brief Function to perform the negative exponential operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void neg_exp(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::exp(-1.0*in_0);}); } + /** + * @brief Function to perform the square operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void sq(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::pow(in_0, 2.0);}); } + /** + * @brief Function to perform the cube operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void cb(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::pow(in_0, 3.0);}); } + /** + * @brief Function to perform the sixth power operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void sixth_pow(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::pow(in_0, 6.0);}); } + /** + * @brief Function to perform the cube root operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void cbrt(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::pow(in_0, 1.0/3.0);}); } + /** + * @brief Function to perform the square root operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void sqrt(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::sqrt(in_0);}); } + /** + * @brief Function to perform the inverse operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void inv(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return 1.0 / in_0;}); } + /** + * @brief Function to perform the log operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void log(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::log(in_0);}); } + /** + * @brief Function to perform the sin operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void sin(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::sin(in_0);}); } + /** + * @brief Function to perform the cos operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void cos(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::cos(in_0);}); } + /** + * @brief Function to perform the absolute value operation + * + * @param size size of the array to perform the output on + * @param inputs array of the pointers to the input arrays + * @param out pointer to the output array + */ inline void abs(int size, std::vector<double*>& inputs, double* out) { std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::abs(in_0);});