FeatureSpace.hpp 8.31 KB
Newer Older
Thomas Purcell's avatar
Thomas Purcell committed
1
2
3
#ifndef FEATURE_SPACE
#define FEATURE_SPACE

Thomas Purcell's avatar
Thomas Purcell committed
4
#include <mpi_interface/MPI_Interface.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
5
#include <feature_creation/node/FeatureNode.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
6
#include <feature_creation/node/ModelNode.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
7
#include <feature_creation/node/operator_nodes/allowed_ops.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
8
#include <feature_creation/node/value_storage/nodes_value_containers.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
9
#include <utils/project.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
10

Thomas Purcell's avatar
Thomas Purcell committed
11
#include <boost/serialization/shared_ptr.hpp>
12
#include <boost/filesystem.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
13

Thomas Purcell's avatar
Thomas Purcell committed
14
#include <iostream>
Thomas Purcell's avatar
Thomas Purcell committed
15
#include <iomanip>
Thomas Purcell's avatar
Thomas Purcell committed
16

17
18
19
20
#ifdef PY_BINDINGS
    namespace np = boost::python::numpy;
    namespace py = boost::python;
#endif
21

22
23
24
25
26
/**
 * @brief Feature Space for SISSO calculations
 * @details Stores and performs all feature calculations for SIS
 *
 */
Thomas Purcell's avatar
Thomas Purcell committed
27
28
class FeatureSpace
{
29
    std::vector<node_ptr> _phi_selected; //!< selected features
30
31
32
33
34
35
36
37
38
39
    std::vector<node_ptr> _phi; //!< all features
    std::vector<node_ptr> _phi_0; //!< initial feature space

    std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings
    std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators
    std::vector<bin_op_node_gen> _com_bin_operators; //!< list of all commutable binary operators
    std::vector<bin_op_node_gen> _bin_operators; //!< list of all binary operators

    std::vector<double> _scores; //!< projection scores for each feature

Thomas Purcell's avatar
Thomas Purcell committed
40
    std::vector<int> _task_sizes; //!< The number of elements in each task
41
    std::vector<int> _start_gen; //!< list of starting index for each generation
42
    std::string _feature_space_file; //!< File to store infromation on the feature space
43

Thomas Purcell's avatar
Thomas Purcell committed
44
    std::function<void(double*, double*, std::vector<node_ptr>&, std::vector<int>&, int)> _project; //!< Function used for projection onto SIS
45
    std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPi communicator
46
47
48
49

    double _l_bound; //!< lower bound for absolute value of the features
    double _u_bound; //!< upper bound for absolute value of the features

50
51
52
53
    int _max_phi; //!< Maximum rung for the feature creation
    int _n_sis_select; //!< Number of features to select for each dimensions
    int _n_samp; //!< Number of samples
    int _n_feat; //!< Total number of features
Thomas Purcell's avatar
Thomas Purcell committed
54
    int _n_rung_store; //!< Total rungs stored
55
    int _n_rung_generate; //!< Total number of rungs to generate on the fly
Thomas Purcell's avatar
Thomas Purcell committed
56
    int _max_temp_store;
Thomas Purcell's avatar
Thomas Purcell committed
57
public:
Thomas Purcell's avatar
Thomas Purcell committed
58

59
60
61
62
63
64
65
66
67
68
    /**
     * @brief Constructor for the feature space
     * @details constructs the feature space from an initial set of features and a list of allowed operatiors
     *
     * @param mpi_comm MPI communicator for the calculations
     * @param allowed_ops list of allowed operators
     * @param max_phi highest rung value for the calculation
     * @param n_sis_select number of features to select during each SIS step
     * @param max_abs_feat_val maximum absolute feature value
     */
Thomas Purcell's avatar
Thomas Purcell committed
69
    FeatureSpace(
Thomas Purcell's avatar
Thomas Purcell committed
70
        std::shared_ptr<MPI_Interface> mpi_comm,
Thomas Purcell's avatar
Thomas Purcell committed
71
72
        std::vector<node_ptr> phi_0,
        std::vector<std::string> allowed_ops,
73
        std::vector<double> prop,
Thomas Purcell's avatar
Thomas Purcell committed
74
        std::vector<int> task_sizes,
Thomas Purcell's avatar
Thomas Purcell committed
75
76
        int max_phi=1,
        int n_sis_select=1,
77
78
79
80
81
82
83
84
        int max_store_rung=-1,
        int n_rung_generate=0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );

    void initialize_fs(std::vector<double> prop);

85
86
87
88
    /**
     * @brief Generate the full feature set from the allowed operators and initial feature set
     * @details populates phi with all features from an initial set and the allowed operators
     */
89
    void generate_feature_space(std::vector<double>& prop);
Thomas Purcell's avatar
Thomas Purcell committed
90

91
92
93
    /**
     * @brief Accessor function for _phi_selected
     */
94
    inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
95
96
97
98

    /**
     * @brief Accessor function for _phi
     */
Thomas Purcell's avatar
Thomas Purcell committed
99
    inline std::vector<node_ptr> phi(){return _phi;};
100
101
102
103

    /**
     * @brief Accessor function for _phi_0
     */
Thomas Purcell's avatar
Thomas Purcell committed
104
    inline std::vector<node_ptr> phi0(){return _phi_0;};
105
106
107
108

    /**
     * @brief Accessor function for _scores
     */
109
110
    inline std::vector<double> scores(){return _scores;}

111
112
113
    /**
     * @brief Accessor function for _mpi_comm
     */
Thomas Purcell's avatar
Thomas Purcell committed
114
    inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;}
115

Thomas Purcell's avatar
Thomas Purcell committed
116
    inline std::vector<int> task_sizes(){return _task_sizes;}
117

118
119
120
121
122
123
124
125
126
    inline std::string feature_space_file(){return _feature_space_file;}
    inline double l_bound(){return _l_bound;}
    inline double u_bound(){return _u_bound;}
    inline int max_phi(){return _max_phi;}
    inline int n_sis_select(){return _n_sis_select;}
    inline int n_samp(){return _n_samp;}
    inline int n_feat(){return _n_feat;}
    inline int n_rung_store(){return _n_rung_store;}
    inline int n_rung_generate(){return _n_rung_generate;}
127

128
129
    void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50);

130
    void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
131

132
    bool valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp);
133
134

    bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
135
136
137
138
139
140
    /**
     * @brief Perform SIS on a feature set with a specified property
     * @details Perform sure-independence screening with either the correct property
     *
     * @param prop The property to calculate SIS from
     */
Thomas Purcell's avatar
Thomas Purcell committed
141
    void sis(std::vector<double>& prop);
142
143
144
145
146
147
148
149
150

    /**
     * @brief Is a feature in this process' _phi?
     *
     * @param ind index
     * @return True if feature is in this _phi
     */
    inline bool feat_in_phi(int ind){return (ind >= _phi[0]->feat_ind()) && (ind <= _phi.back()->feat_ind());}

151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
    // Python Interface Functions
    #ifdef PY_BINDINGS
        /**
         * @brief Constructor for the feature space
         * @details constructs the feature space from an initial set of features and a list of allowed operatiors
         *
         * @param mpi_comm MPI communicator for the calculations
         * @param allowed_ops list of allowed operators
         * @param max_phi highest rung value for the calculation
         * @param n_sis_select number of features to select during each SIS step
         * @param max_abs_feat_val maximum absolute feature value
         */
        FeatureSpace(
            py::list phi_0,
            py::list allowed_ops,
            py::list prop,
            py::list task_sizes,
            int max_phi=1,
            int n_sis_select=1,
            int max_store_rung=-1,
            int n_rung_generate=0,
            double min_abs_feat_val=1e-50,
            double max_abs_feat_val=1e50
        );

        /**
         * @brief Constructor for the feature space
         * @details constructs the feature space from an initial set of features and a list of allowed operatiors
         *
         * @param mpi_comm MPI communicator for the calculations
         * @param allowed_ops list of allowed operators
         * @param max_phi highest rung value for the calculation
         * @param n_sis_select number of features to select during each SIS step
         * @param max_abs_feat_val maximum absolute feature value
         */
        FeatureSpace(
            py::list phi_0,
            py::list allowed_ops,
            np::ndarray prop,
            py::list task_sizes,
            int max_phi=1,
            int n_sis_select=1,
            int max_store_rung=-1,
            int n_rung_generate=0,
            double min_abs_feat_val=1e-50,
            double max_abs_feat_val=1e50
        );


        inline void sis(np::ndarray prop)
        {
            std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop);
            sis(prop_vec);
        }

        inline void sis(py::list prop)
        {
            std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop);
            sis(prop_vec);
        }

        py::list phi_selected_py();
        py::list phi0_py();
        inline np::ndarray scores_py(){return python_conv_utils::to_ndarray<double>(_scores);};
        inline py::list task_sizes_py(){return python_conv_utils::to_list<int>(_task_sizes);};
        inline py::list allowed_ops_py(){return python_conv_utils::to_list<std::string>(_allowed_ops);}
        inline py::list start_gen_py(){return python_conv_utils::to_list<int>(_start_gen);}
    #endif
Thomas Purcell's avatar
Thomas Purcell committed
219
220
221
};

#endif