Commit f1d98ebb authored by Thomas Purcell's avatar Thomas Purcell
Browse files

add documentation to nodes/feature space

Documentation added to feature space and base node operators
parent ab2a5d4f
......@@ -53,26 +53,6 @@ FeatureSpace::FeatureSpace(
_scores.reserve(_phi.size());
}
FeatureSpace::FeatureSpace(FeatureSpace &o) :
_max_phi(o._max_phi),
_n_sis_select(o._n_sis_select),
_n_samp(o._n_samp),
_n_feat(o._n_feat),
_max_abs_feat_val(o._max_abs_feat_val),
_start_gen(o._start_gen),
_start_ind(o._start_ind),
_prop(o._prop),
_scores(o._scores),
_D(o._D),
_allowed_ops(o._allowed_ops),
_un_operators(o._un_operators),
_bin_operators(o._bin_operators),
_com_bin_operators(o._com_bin_operators),
_phi_selected(o._phi_selected),
_phi(o._phi),
_phi_0(o._phi_0)
{}
void FeatureSpace::generate_feature_space()
{
std::vector<double> scores(_phi.size());
......@@ -179,6 +159,7 @@ void FeatureSpace::generate_feature_space()
}
std::cout << "DONE"<< std::endl;
}
_n_feat = _phi.size();
}
void FeatureSpace::project_r(double* prop)
......
......@@ -10,34 +10,47 @@
#include <iostream>
// namespace mpi = boost::mpi;
/**
* @brief Feature Space for SISSO calculations
* @details Stores and performs all feature calculations for SIS
*
*/
class FeatureSpace
{
std::shared_ptr<MPI_Interface> _mpi_comm;
int _max_phi;
int _n_sis_select;
int _n_samp;
int _n_feat;
std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPi communicator
int _max_phi; //!< Maximum rung for the feature creation
int _n_sis_select; //!< Number of features to select for each dimensions
int _n_samp; //!< Number of samples
int _n_feat; //!< Total number of features
double _max_abs_feat_val;
double _max_abs_feat_val; //!< Maximum absolute value for any feature
std::vector<int> _start_gen;
std::vector<int> _start_ind;
std::vector<int> _start_gen; //!< list of starting index for each generation
std::vector<double> _prop;
std::vector<double> _scores;
std::vector<double> _D;
std::vector<double> _prop; //!< property to learn
std::vector<double> _scores; //!< projection scores for each feature
std::vector<double> _D; //!< matrix of slected features
std::vector<std::string> _allowed_ops;
std::vector<un_op_node_gen> _un_operators;
std::vector<bin_op_node_gen> _bin_operators;
std::vector<bin_op_node_gen> _com_bin_operators;
std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings
std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators
std::vector<bin_op_node_gen> _bin_operators; //!< list of all binary operators
std::vector<bin_op_node_gen> _com_bin_operators; //!< list of all commutable binary operators
std::vector<node_ptr> _phi_selected;
std::vector<node_ptr> _phi;
std::vector<node_ptr> _phi_0;
std::vector<node_ptr> _phi_selected; //!< selected features
std::vector<node_ptr> _phi; //!< all features
std::vector<node_ptr> _phi_0; //!< initial feature space
public:
/**
* @brief Constructor for the feature space
* @details constructs the feature space from an initial set of features and a list of allowed operatiors
*
* @param mpi_comm MPI communicator for the calculations
* @param allowed_ops list of allowed operators
* @param max_phi highest rung value for the calculation
* @param n_sis_select number of features to select during each SIS step
* @param max_abs_feat_val maximum absolute feature value
*/
FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
......@@ -47,19 +60,59 @@ public:
double max_abs_feat_val=1e27
);
FeatureSpace(FeatureSpace &o);
/**
* @brief Generate the full feature set from the allowed operators and initial feature set
* @details populates phi with all features from an initial set and the allowed operators
*/
void generate_feature_space();
/**
* @brief Accessor function for _phi_selected
*/
inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
/**
* @brief Accessor function for _phi
*/
inline std::vector<node_ptr> phi(){return _phi;};
/**
* @brief Accessor function for _phi_0
*/
inline std::vector<node_ptr> phi0(){return _phi_0;};
/**
* @brief Accessor function for _scores
*/
inline std::vector<double>& scores(){return _scores;};
/**
* @brief Accessor function for _mpi_comm
*/
inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;}
/**
* @brief Access the value of a selected feature
* @details Given a feature index get the selected values
*
* @param ind index of the selected feature
*/
inline double* D(int ind){return &_D[ind * _n_samp];}
/**
* @brief calculate the projection scores for all features for a given property
* @details Calculate the projection score based on the Pearson correlation
*
* @param prop [description]
*/
void project_r(double* prop);
/**
* @brief Perform SIS on a feature set with a specified property
* @details Perform sure-independence screening with either the correct property
*
* @param prop The property to calculate SIS from
*/
void sis(std::vector<double>& prop);
};
......
......@@ -4,7 +4,7 @@ FeatureNode::FeatureNode()
{}
FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit) :
Node(feat_ind, value.size()),
Node<0>(feat_ind, value.size()),
_expr(expr),
_unit(unit)
{
......@@ -12,7 +12,7 @@ FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> val
}
FeatureNode::FeatureNode(const FeatureNode &o) :
Node(o)
Node<0>(o)
{}
// BOOST_CLASS_EXPORT(FeatureNode)
......@@ -13,35 +13,96 @@
typedef std::function<double(double)> unary_op_func;
typedef std::function<double(double, double)> binary_op_func;
/**
* @brief Node that describe the leaves of the operator graph (Initial features in Phi_0)
*/
class FeatureNode: public Node
{
friend class boost::serialization::access;
protected:
std::string _expr;
Unit _unit;
std::string _expr; //!< Expression of the feature
Unit _unit; //!< Unit for the feature
public:
/**
* @brief Base Constructor
* @details This is only used for serialization
*/
FeatureNode();
/**
* @brief Constructs a feature node
*
* @param feat_ind index of the feature
* @param expr Expression for the feature
* @param value Value of the feature for each sample
* @param unit Unit of the feature
*/
FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit);
/**
* @brief Copy constructor
*
* @param o Node to be copied
*/
FeatureNode(const FeatureNode &o);
/**
* @brief Get the expression for the overall descriptor (From head node down)
*/
inline std::string expr(){return _expr;}
/**
* @brief Get the unit for the overall descriptor (From head node down)
*/
inline Unit unit(){return _unit;}
/**
* @brief Set the value for the feature
*/
inline void set_value(){return;}
/**
* @brief Access the rung of the feature (Depth of the chart)
*/
inline int rung(){return 0;}
/**
* @brief Set up the feature value pointers
*/
inline void set_feat_val_ptrs(){return;}
/**
* @brief Accessor function to the feature value pointers
*/
inline std::vector<double*> feat_value_ptrs(){return std::vector<double*>(0);}
/**
* @brief Check if the feature contains NaN
*/
inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});}
/**
* @brief Check if feature is constant
*/
inline bool is_const()
{
double mean = util_funcs::mean(value_ptr(), _n_samp);
return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;});
}
/**
* @brief Accessor function to the value of the feature
*/
inline double* value_ptr(){return node_value_arrs::get_value_ptr(_feat_ind);}
/**
* @brief Serialization function to send over MPI
*
* @param ar Archive representation of node
*/
template <typename Archive>
void serialize(Archive& ar, const unsigned int version)
void serialize(Archive& ar)
{
ar & boost::serialization::base_object<Node>(*this);
ar & _expr;
......
......@@ -17,39 +17,101 @@
typedef std::function<double(double)> unary_op_func;
typedef std::function<double(double, double)> binary_op_func;
/**
* @brief Base class for a Node
* @details Class used to describe a Node on the descriptor graph. Features are treated as an operation graph, these are the nodes on that graph.
*
*/
class Node
{
protected:
int _n_samp;
int _feat_ind;
int _n_samp; //!< Number of samples in the feature
int _feat_ind; //!< Index of the feature
public:
/**
* @brief Base Constructor
* @details This is only used for serialization
*/
Node();
Node(int feat_ind, int n_samp);
Node(const Node &o);
// bool equal(Node node_2);
/**
* @brief Constructor that specifies feature index and number of samples
*
* @param feat_ind index of the feature
* @param n_samp number of samples in the node
*/
Node(int feat_ind, int n_samp);
// inline bool operator== (Node node_2){return equal(node_2);}
// inline bool operator!= (Node node_2){return !equal(node_2);}
/**
* @brief Copy constructor
*
* @param o Node to be copied
*/
Node(const Node &o);
/**
* @brief Acesssor function to get the number of samples
*/
inline int n_samp(){return _n_samp;}
/**
* @brief Accessor function to get the feature ind
*/
inline int& feat_ind(){return _feat_ind;}
/**
* @brief Get the expression for the overall descriptor (From head node down)
*/
virtual std::string expr() = 0;
/**
* @brief Get the unit for the overall descriptor (From head node down)
*/
virtual Unit unit() = 0;
/**
* @brief Set the value for the feature
*/
virtual void set_value() = 0;
/**
* @brief Access the rung of the feature (Depth of the chart)
*/
virtual int rung() = 0;
/**
* @brief Set up the feature value pointers
*/
virtual void set_feat_val_ptrs() = 0;
/**
* @brief Accessor function to the feature value pointers
*/
virtual std::vector<double*> feat_value_ptrs() = 0;
/**
* @brief Accessor function to the value of the feature
*/
virtual double* value_ptr() = 0;
/**
* @brief Check if the feature contains NaN
*/
virtual bool is_nan() = 0;
/**
* @brief Check if feature is constant
*/
virtual bool is_const() = 0;
/**
* @brief Serialization function to send over MPI
*
* @param ar Archive representation of node
*/
template <typename Archive>
void serialize(Archive& ar, const unsigned int version)
void serialize(Archive& ar)
{
ar & _n_samp;
ar & _feat_ind;
......
#include <feature_creation/node/operator_nodes/OperatorNode.hpp>
OperatorNode::OperatorNode()
OperatorNode()
{}
OperatorNode::OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) :
OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) :
Node(feat_ind, feats[0]->n_samp()),
_rung_offset(rung),
_feats(feats)
......@@ -11,7 +11,7 @@ OperatorNode::OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind)
set_feat_val_ptrs();
}
OperatorNode::OperatorNode(const OperatorNode &o) :
OperatorNode(const OperatorNode &o) :
Node(o),
_rung_offset(o._rung_offset),
_feats(o._feats),
......
......@@ -8,8 +8,15 @@
#include <boost/serialization/export.hpp>
#include <boost/serialization/shared_ptr.hpp>
#include <boost/serialization/split_member.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/array.hpp>
/**
* @brief Base class to describe operator nodes
* @details
*
*/
template<size_t N>
class OperatorNode: public Node
{
friend class boost::serialization::access;
......@@ -20,11 +27,27 @@ protected:
std::vector<double*> _feat_val_ptrs;
public:
/**
* @brief Base Constructor
* @details This is only used for serialization
*/
OperatorNode();
/**
* @brief Constructor
* @details Constructs an operator node with a set of features
*
* @param feats array of features that the operator will act on
* @param rung run the feature is on (depth of the tree)
* @param feat_ind index of the feature
*/
OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind);
/**
* @brief Base Constructor
* @details This is only used for serialization
*/
OperatorNode(const OperatorNode &o);
virtual std::string expr() = 0;
virtual Unit unit() = 0;
......@@ -40,9 +63,19 @@ public:
set_feat_val_ptrs();
}
/**
* @brief Accessor function to the feature value pointers
*/
inline std::vector<double*> feat_value_ptrs(){return _feat_val_ptrs;}
/**
* @brief Access the rung of the feature (Depth of the chart)
*/
inline int rung(){return _rung_offset;}
/**
* @brief Accessor function to the value of the feature
*/
inline double* value_ptr()
{
if((_rung_offset > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_reg(_feat_ind) != _feat_ind))
......@@ -51,13 +84,23 @@ public:
return node_value_arrs::get_value_ptr(_feat_ind);
}
/**
* @brief Check if the feature contains NaN
*/
inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});}
/**
* @brief Check if feature is constant
*/
inline bool is_const()
{
double mean = util_funcs::mean(value_ptr(), _n_samp);
return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;});
}
/**
* @brief Set up the feature value pointers
*/
inline void set_feat_val_ptrs()
{
_value_ptr = node_value_arrs::get_value_ptr(_feat_ind, 0);
......
......@@ -27,8 +27,8 @@ typedef std::function<node_ptr(node_ptr, node_ptr, int, int)> bin_op_node_gen;
namespace allowed_op_maps
{
extern std::map<std::string, un_op_node_gen> unary_operator_map;
extern std::map<std::string, bin_op_node_gen> binary_operator_map;
extern std::map<std::string, un_op_node_gen> unary_operator_map; //!< map that converts a string into an operator node generator function for all unary operators
extern std::map<std::string, bin_op_node_gen> binary_operator_map; //!< map that converts a string into an operator node generator function for all binary operators
void set_node_maps();
};
......
......@@ -8,86 +8,205 @@ typedef std::function<void(int, std::vector<double*>&, double*)> op_func;
namespace allowed_op_funcs
{
/**
* @brief Function to perform the addition operation
*
* @param size size of the array to perform the output on
* @param inputs array of the pointers to the input arrays
* @param out pointer to the output array
*/
inline void add(int size, std::vector<double*>& inputs, double* out)
{
std::transform(inputs[0], inputs[0] + size, inputs[1], out, std::plus<double>());
}
/**
* @brief Function to perform the subtraction operation
*
* @param size size of the array to perform the output on
* @param inputs array of the pointers to the input arrays
* @param out pointer to the output array
*/
inline void sub(int size, std::vector<double*>& inputs, double* out)
{
std::transform(inputs[0], inputs[0] + size, inputs[1], out, std::minus<double>());
}
/**
* @brief Function to perform the absolute difference operation
*
* @param size size of the array to perform the output on
* @param inputs array of the pointers to the input arrays
* @param out pointer to the output array
*/
inline void abs_diff(int size, std::vector<double*>& inputs, double* out)
{
std::transform(inputs[0], inputs[0] + size, inputs[1], out, [](double in_0, double in_1){return std::abs(in_0 - in_1);});
}
/**
* @brief Function to perform the multiply operation
*
* @param size size of the array to perform the output on
* @param inputs array of the pointers to the input arrays
* @param out pointer to the output array
*/
inline void mult(int size, std::vector<double*>& inputs, double* out)
{
std::transform(inputs[0], inputs[0] + size, inputs[1], out, std::multiplies<double>());
}
/**
* @brief Function to perform the division operation
*
* @param size size of the array to perform the output on
* @param inputs array of the pointers to the input arrays
* @param out pointer to the output array
*/
inline void div(int size, std::vector<double*>& inputs, double* out)
{
std::transform(inputs[0], inputs[0] + size, inputs[1], out, std::divides<double>());
}
/**
* @brief Function to perform the exponential operation
*
* @param size size of the array to perform the output on
* @param inputs array of the pointers to the input arrays
* @param out pointer to the output array
*/
inline void exp(int size, std::vector<double*>& inputs, double* out)
{
std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::exp(in_0);});
}
/**
* @brief Function to perform the negative exponential operation
*
* @param size size of the array to perform the output on
* @param inputs array of the pointers to the input arrays
* @param out pointer to the output array
*/
inline void neg_exp(int size, std::vector<double*>& inputs, double* out)
{
std::transform(inputs[0], inputs[0] + size, out, [](double in_0){return std::exp(-1.0*in_0);});
}
/**
* @brief Function to perform the square operation