FeatureSpace.hpp 7.69 KB
Newer Older
Thomas Purcell's avatar
Thomas Purcell committed
1
2
3
#ifndef FEATURE_SPACE
#define FEATURE_SPACE

Thomas Purcell's avatar
Thomas Purcell committed
4
#include <mpi_interface/MPI_Interface.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
5
#include <feature_creation/node/FeatureNode.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
6
#include <feature_creation/node/ModelNode.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
7
#include <feature_creation/node/operator_nodes/allowed_ops.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
8
#include <feature_creation/node/value_storage/nodes_value_containers.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
9
#include <utils/project.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
10

Thomas Purcell's avatar
Thomas Purcell committed
11
#include <boost/serialization/shared_ptr.hpp>
12
#include <boost/filesystem.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
13
#include <boost/python.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
14

Thomas Purcell's avatar
Thomas Purcell committed
15
#include <iostream>
Thomas Purcell's avatar
Thomas Purcell committed
16
#include <iomanip>
Thomas Purcell's avatar
Thomas Purcell committed
17

18
19
20
namespace python = boost::python;
namespace np = boost::python::numpy;

21
22
23
24
25
/**
 * @brief Feature Space for SISSO calculations
 * @details Stores and performs all feature calculations for SIS
 *
 */
Thomas Purcell's avatar
Thomas Purcell committed
26
27
class FeatureSpace
{
28
    std::vector<node_ptr> _phi_selected; //!< selected features
29
30
31
32
33
34
35
36
37
38
    std::vector<node_ptr> _phi; //!< all features
    std::vector<node_ptr> _phi_0; //!< initial feature space

    std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings
    std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators
    std::vector<bin_op_node_gen> _com_bin_operators; //!< list of all commutable binary operators
    std::vector<bin_op_node_gen> _bin_operators; //!< list of all binary operators

    std::vector<double> _scores; //!< projection scores for each feature

Thomas Purcell's avatar
Thomas Purcell committed
39
    std::vector<int> _task_sizes; //!< The number of elements in each task
40
    std::vector<int> _start_gen; //!< list of starting index for each generation
41
    std::string _feature_space_file; //!< File to store infromation on the feature space
42

Thomas Purcell's avatar
Thomas Purcell committed
43
    std::function<void(double*, double*, std::vector<node_ptr>&, std::vector<int>&, int)> _project; //!< Function used for projection onto SIS
44
    std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPi communicator
45
46
47
48

    double _l_bound; //!< lower bound for absolute value of the features
    double _u_bound; //!< upper bound for absolute value of the features

49
50
51
52
    int _max_phi; //!< Maximum rung for the feature creation
    int _n_sis_select; //!< Number of features to select for each dimensions
    int _n_samp; //!< Number of samples
    int _n_feat; //!< Total number of features
Thomas Purcell's avatar
Thomas Purcell committed
53
    int _n_rung_store; //!< Total rungs stored
54
    int _n_rung_generate; //!< Total number of rungs to generate on the fly
Thomas Purcell's avatar
Thomas Purcell committed
55
    int _max_temp_store;
Thomas Purcell's avatar
Thomas Purcell committed
56
public:
Thomas Purcell's avatar
Thomas Purcell committed
57

58
59
60
61
62
63
64
65
66
67
    /**
     * @brief Constructor for the feature space
     * @details constructs the feature space from an initial set of features and a list of allowed operatiors
     *
     * @param mpi_comm MPI communicator for the calculations
     * @param allowed_ops list of allowed operators
     * @param max_phi highest rung value for the calculation
     * @param n_sis_select number of features to select during each SIS step
     * @param max_abs_feat_val maximum absolute feature value
     */
Thomas Purcell's avatar
Thomas Purcell committed
68
    FeatureSpace(
Thomas Purcell's avatar
Thomas Purcell committed
69
        std::shared_ptr<MPI_Interface> mpi_comm,
Thomas Purcell's avatar
Thomas Purcell committed
70
71
        std::vector<node_ptr> phi_0,
        std::vector<std::string> allowed_ops,
72
        std::vector<double> prop,
Thomas Purcell's avatar
Thomas Purcell committed
73
        std::vector<int> task_sizes,
Thomas Purcell's avatar
Thomas Purcell committed
74
75
        int max_phi=1,
        int n_sis_select=1,
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
        int max_store_rung=-1,
        int n_rung_generate=0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );

    /**
     * @brief Constructor for the feature space
     * @details constructs the feature space from an initial set of features and a list of allowed operatiors
     *
     * @param mpi_comm MPI communicator for the calculations
     * @param allowed_ops list of allowed operators
     * @param max_phi highest rung value for the calculation
     * @param n_sis_select number of features to select during each SIS step
     * @param max_abs_feat_val maximum absolute feature value
     */
    FeatureSpace(
        python::list phi_0,
        python::list allowed_ops,
        python::list prop,
        python::list task_sizes,
        int max_phi=1,
        int n_sis_select=1,
        int max_store_rung=-1,
        int n_rung_generate=0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );

    /**
     * @brief Constructor for the feature space
     * @details constructs the feature space from an initial set of features and a list of allowed operatiors
     *
     * @param mpi_comm MPI communicator for the calculations
     * @param allowed_ops list of allowed operators
     * @param max_phi highest rung value for the calculation
     * @param n_sis_select number of features to select during each SIS step
     * @param max_abs_feat_val maximum absolute feature value
     */
    FeatureSpace(
        python::list phi_0,
        python::list allowed_ops,
        np::ndarray prop,
        python::list task_sizes,
        int max_phi=1,
        int n_sis_select=1,
        int max_store_rung=-1,
123
        int n_rung_generate=0,
Thomas Purcell's avatar
Thomas Purcell committed
124
125
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
Thomas Purcell's avatar
Thomas Purcell committed
126
127
    );

128
129
    void initialize_fs(std::vector<double> prop);

130
131
132
133
    /**
     * @brief Generate the full feature set from the allowed operators and initial feature set
     * @details populates phi with all features from an initial set and the allowed operators
     */
134
    void generate_feature_space(std::vector<double>& prop);
Thomas Purcell's avatar
Thomas Purcell committed
135

136
137
138
    /**
     * @brief Accessor function for _phi_selected
     */
139
    inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
140

Thomas Purcell's avatar
Thomas Purcell committed
141
    boost::python::list phi_selected_py();
142

143
144
145
    /**
     * @brief Accessor function for _phi
     */
Thomas Purcell's avatar
Thomas Purcell committed
146
    inline std::vector<node_ptr> phi(){return _phi;};
147
148
149
150

    /**
     * @brief Accessor function for _phi_0
     */
Thomas Purcell's avatar
Thomas Purcell committed
151
    inline std::vector<node_ptr> phi0(){return _phi_0;};
152

Thomas Purcell's avatar
Thomas Purcell committed
153
    boost::python::list phi0_py();
154
155
156
    /**
     * @brief Accessor function for _scores
     */
157
158
159
    inline std::vector<double> scores(){return _scores;}

    inline np::ndarray scores_py(){return python_conv_utils::to_ndarray<double>(_scores);};
160
161
162
163

    /**
     * @brief Accessor function for _mpi_comm
     */
Thomas Purcell's avatar
Thomas Purcell committed
164
    inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;}
165

Thomas Purcell's avatar
Thomas Purcell committed
166
    inline std::vector<int> task_sizes(){return _task_sizes;}
167

168
169
170
171
172
173
    inline boost::python::list task_sizes_py(){return python_conv_utils::to_list<int>(_task_sizes);};

    inline boost::python::list allowed_ops_py(){return python_conv_utils::to_list<std::string>(_allowed_ops);}

    inline boost::python::list start_gen_py(){return python_conv_utils::to_list<int>(_start_gen);}

174
175
    void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50);

176
    void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
177

178
    bool valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp);
179
180

    bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
181
182
183
184
185
186
    /**
     * @brief Perform SIS on a feature set with a specified property
     * @details Perform sure-independence screening with either the correct property
     *
     * @param prop The property to calculate SIS from
     */
Thomas Purcell's avatar
Thomas Purcell committed
187
    void sis(std::vector<double>& prop);
188

189
190
191
192
193
194
195
196
197
198
199
200
    inline void sis(np::ndarray prop)
    {
        std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop);
        sis(prop_vec);
    }

    inline void sis(python::list prop)
    {
        std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop);
        sis(prop_vec);
    }

201
202
203
204
205
206
207
208
    /**
     * @brief Is a feature in this process' _phi?
     *
     * @param ind index
     * @return True if feature is in this _phi
     */
    inline bool feat_in_phi(int ind){return (ind >= _phi[0]->feat_ind()) && (ind <= _phi.back()->feat_ind());}

Thomas Purcell's avatar
Thomas Purcell committed
209
    static void register_python();
Thomas Purcell's avatar
Thomas Purcell committed
210
211
212
};

#endif