FeatureSpace.hpp 29 KB
Newer Older
1
// Copyright 2021 Thomas A. R. Purcell
2
//
3
4
5
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
10
11
12
13
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
14
/** @file feature_creation/feature_space/FeatureSpace.hpp
Thomas Purcell's avatar
Thomas Purcell committed
15
 * @brief Create a feature space from an initial set of features and algebraic operators
16
17
18
19
20
21
22
 *
 *  Use an initial set of features and combine them to generate more complicated algebraical features. SIS is also performed here
 *
 *  @author Thomas A. R. Purcell (tpurcell)
 *  @bug No known bugs.
 */

Thomas Purcell's avatar
Thomas Purcell committed
23
24
25
#ifndef FEATURE_SPACE
#define FEATURE_SPACE

26
#include <boost/filesystem.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
27

28
#include <utility>
Thomas Purcell's avatar
Thomas Purcell committed
29

Thomas Purcell's avatar
Thomas Purcell committed
30
#include "feature_creation/node/utils.hpp"
Thomas Purcell's avatar
Thomas Purcell committed
31
32
33
34
35

#include "mpi_interface/MPI_Interface.hpp"
#include "mpi_interface/MPI_ops.hpp"
#include "mpi_interface/serialize_tuple.h"

Thomas Purcell's avatar
Thomas Purcell committed
36
37
#include "utils/project.hpp"

38
39
40
41
#ifdef PY_BINDINGS
    namespace np = boost::python::numpy;
    namespace py = boost::python;
#endif
42

43
// DocString: cls_feat_space
44
45
46
47
48
/**
 * @brief Feature Space for SISSO calculations
 * @details Stores and performs all feature calculations for SIS
 *
 */
Thomas Purcell's avatar
Thomas Purcell committed
49
50
class FeatureSpace
{
Thomas Purcell's avatar
Thomas Purcell committed
51
52
53
    std::vector<node_ptr> _phi_selected; //!< A vector containing all of the selected features
    std::vector<node_ptr> _phi; //!< A vector containing all features generated (Not including those created on the Fly during SIS)
    const std::vector<node_ptr> _phi_0; //!< A vector containing all of the Primary features
54

55
    #ifdef PARAMETERIZE
Thomas Purcell's avatar
Thomas Purcell committed
56
57
58
59
60
61
62
63
    std::vector<node_ptr> _phi_reparam; //!< A vector containing the features created when reparameterizating using the residuals
    std::vector<int> _end_no_params; //!< A vector containing the indexes of each rung where parameterized nodes start
    std::vector<int> _start_rung_reparam; //!< A vector containing the indexes of each rung where parameterized nodes start

    std::vector<un_param_op_node_gen> _un_param_operators; //!< Vector containing all parameterized unary operators with free parameters
    std::vector<bin_param_op_node_gen> _com_bin_param_operators; //!< Vector containing all parameterized commutable binary operators with free parameters
    std::vector<bin_param_op_node_gen> _bin_param_operators; //!< Vector containing all parameterized binary operators with free parameters
    std::vector<std::string> _allowed_param_ops; //!< Vector containing all allowed operators strings for operators with free parameters
64
    #endif
65

Thomas Purcell's avatar
Thomas Purcell committed
66
67
68
69
    std::vector<std::string> _allowed_ops; //!< Vector containing all allowed operators strings
    std::vector<un_op_node_gen> _un_operators; //!< Vector containing all unary operators
    std::vector<bin_op_node_gen> _com_bin_operators; //!< Vector containing all commutable binary operators
    std::vector<bin_op_node_gen> _bin_operators; //!< Vector containing all binary operators
70

Thomas Purcell's avatar
Thomas Purcell committed
71
72
    std::vector<double> _prop; //!< The value of the property vector for each training sample
    std::vector<double> _scores; //!< The projection scores for each feature
73

Thomas Purcell's avatar
Thomas Purcell committed
74
75
76
77
78
    const std::vector<int> _task_sizes; //!< Number of training samples per task
    std::vector<int> _start_rung; //!< Vector containing the indexes where each rung starts in _phi
    const std::string _project_type; //!< The type of LossFunction to use when projecting the features onto a property
    const std::string _feature_space_file; //!< File to output the computer readable representation of the selected features to
    const std::string _feature_space_summary_file; //!< File to output the human readable representation of the selected features to
79

Thomas Purcell's avatar
Thomas Purcell committed
80
81
    std::function<bool(const double*, const int, const double, const std::vector<double>&, const double, const int, const int)> _is_valid; //!< Function used to determine of a feature is too correlated to previously selected features
    std::function<bool(const double*, const int, const double, const std::vector<node_ptr>&, const std::vector<double>&, const double)> _is_valid_feat_list; //!< Function used to determine of a feature is too correlated to previously selected features within a given list
82

Thomas Purcell's avatar
Thomas Purcell committed
83
    std::shared_ptr<MPI_Interface> _mpi_comm; //!< the MPI communicator for the calculation
84

85
    const double _cross_cor_max; //!< Maximum cross-correlation used for selecting features
Thomas Purcell's avatar
Thomas Purcell committed
86
87
    const double _l_bound; //!< The lower bound for the maximum absolute value of the features
    const double _u_bound; //!< The upper bound for the maximum absolute value of the features
88

Thomas Purcell's avatar
Thomas Purcell committed
89
90
91
    int _n_rung_store; //!< The number of rungs to calculate and store the value of the features for all samples
    int _n_feat; //!< Total number of features in the feature space
    int _max_rung; //!< Maximum rung for the feature creation
92

Thomas Purcell's avatar
Thomas Purcell committed
93
94
95
    const int _n_sis_select; //!< Number of features to select during each SIS iteration
    const int _n_samp; //!< Number of samples in the training set
    const int _n_rung_generate; //!< Either 0 or 1, and is the number of rungs to generate on the fly during SIS
96

Thomas Purcell's avatar
Thomas Purcell committed
97
98
    int _max_param_depth; //!< The maximum depth in the binary expression tree to set non-linear optimization
    const bool _reparam_residual; //!< If True then reparameterize features using the residuals of each model
99

Thomas Purcell's avatar
Thomas Purcell committed
100
public:
Thomas Purcell's avatar
Thomas Purcell committed
101

102
    #ifdef PARAMETERIZE
103
    /**
Thomas Purcell's avatar
Thomas Purcell committed
104
     * @brief FeatureSpace constructor given a set of primary features and operators
105
106
     *
     * @param mpi_comm MPI communicator for the calculations
Thomas Purcell's avatar
Thomas Purcell committed
107
108
109
110
     * @param phi_0 The set of primary features
     * @param allowed_ops The list of allowed operators
     * @param allowed_param_ops The list of allowed operators to be used with non-linear optimization
     * @param prop List containing the property vector (training data only)
111
     * @param task_sizes The number of samples per task
Thomas Purcell's avatar
Thomas Purcell committed
112
113
114
115
116
117
118
119
120
121
     * @param project_type The type of loss function/projection operator to use
     * @param max_rung The maximum rung of the feature (Height of the binary expression tree -1)
     * @param n_sis_select The number of features to select during each SIS step
     * @param n_rung_store The number of rungs whose feature's data is always stored in memory
     * @param n_rung_generate Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val The maximum allowed absolute feature value for a feature
     * @param max_param_depth The maximum depth in the binary expression tree to set non-linear optimization
     * @param reparam_residual If True then reparameterize features using the residuals of each model
122
     */
Thomas Purcell's avatar
Thomas Purcell committed
123
    FeatureSpace(
Thomas Purcell's avatar
Thomas Purcell committed
124
        std::shared_ptr<MPI_Interface> mpi_comm,
Thomas Purcell's avatar
Thomas Purcell committed
125
126
        std::vector<node_ptr> phi_0,
        std::vector<std::string> allowed_ops,
Thomas Purcell's avatar
Thomas Purcell committed
127
        std::vector<std::string> allowed_param_ops,
128
        std::vector<double> prop,
Thomas Purcell's avatar
Thomas Purcell committed
129
        std::vector<int> task_sizes,
130
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
131
        int max_rung=1,
Thomas Purcell's avatar
Thomas Purcell committed
132
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
133
        int n_rung_store=-1,
134
        int n_rung_generate=0,
Thomas Purcell's avatar
Thomas Purcell committed
135
        double cross_corr_max=1.0,
136
        double min_abs_feat_val=1e-50,
137
        double max_abs_feat_val=1e50,
138
139
        int max_param_depth=-1,
        bool reparam_residual=false
140
    );
141
142
    #else
    /**
Thomas Purcell's avatar
Thomas Purcell committed
143
     * @brief FeatureSpace constructor given a set of primary features and operators
144
145
     *
     * @param mpi_comm MPI communicator for the calculations
Thomas Purcell's avatar
Thomas Purcell committed
146
147
148
     * @param phi_0 The set of primary features
     * @param allowed_ops The list of allowed operators
     * @param prop List containing the property vector (training data only)
149
     * @param task_sizes The number of samples per task
Thomas Purcell's avatar
Thomas Purcell committed
150
151
152
153
154
155
156
157
     * @param project_type The type of loss function/projection operator to use
     * @param max_rung The maximum rung of the feature (Height of the binary expression tree -1)
     * @param n_sis_select The number of features to select during each SIS step
     * @param n_rung_store The number of rungs whose feature's data is always stored in memory
     * @param n_rung_generate Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val The maximum allowed absolute feature value for a feature
158
159
160
161
162
163
164
165
     */
    FeatureSpace(
        std::shared_ptr<MPI_Interface> mpi_comm,
        std::vector<node_ptr> phi_0,
        std::vector<std::string> allowed_ops,
        std::vector<double> prop,
        std::vector<int> task_sizes,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
166
        int max_rung=1,
167
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
168
        int n_rung_store=-1,
169
170
171
172
173
174
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );
    #endif
175
    /**
Thomas Purcell's avatar
Thomas Purcell committed
176
     * @brief Initialize members of the FeatureSpace using _prop
177
     */
Thomas Purcell's avatar
Thomas Purcell committed
178
    void initialize_fs();
179

180
    /**
Thomas Purcell's avatar
Thomas Purcell committed
181
     * @brief Populate the operator lists using _allowed_ops and _allowed_param_ops
182
183
184
185
     */
    void set_op_lists();

    /**
Thomas Purcell's avatar
Thomas Purcell committed
186
     * @brief Create SIS output files and write their headers
187
     */
188
    void initialize_fs_output_files() const;
Thomas Purcell's avatar
Thomas Purcell committed
189

190
    /**
Thomas Purcell's avatar
Thomas Purcell committed
191
     * @brief Populate _phi using _phi_0 and the allowed operators up to (_max_rung - _n_rung_generate)^th rung
192
     */
193
    void generate_feature_space();
Thomas Purcell's avatar
Thomas Purcell committed
194

195
    /**
Thomas Purcell's avatar
Thomas Purcell committed
196
     * @brief A vector containing all of the selected features
197
     */
198
    inline std::vector<node_ptr> phi_selected() const {return _phi_selected;};
199
200

    /**
Thomas Purcell's avatar
Thomas Purcell committed
201
     * @brief A vector containing all features generated (Not including those created on the Fly during SIS)
202
     */
203
    inline std::vector<node_ptr> phi() const {return _phi;};
204
205

    /**
Thomas Purcell's avatar
Thomas Purcell committed
206
     * @brief A vector containing all of the Primary features
207
     */
208
    inline std::vector<node_ptr> phi0() const {return _phi_0;};
209
210

    /**
Thomas Purcell's avatar
Thomas Purcell committed
211
     * @brief The projection scores for each feature in _phi
212
     */
213
    inline std::vector<double> scores() const {return _scores;}
214

215
    /**
216
     * @brief The MPI Communicator
217
     */
218
    inline std::shared_ptr<MPI_Interface> mpi_comm() const {return _mpi_comm;}
219

220
    /**
Thomas Purcell's avatar
Thomas Purcell committed
221
     * @brief Number of training samples per task
222
     */
223
    inline std::vector<int> task_sizes() const {return _task_sizes;}
224

225
    // DocString: feat_space_feature_space_file
226
    /**
Thomas Purcell's avatar
Thomas Purcell committed
227
     * @brief Filename of the file to output the computer readable representation of the selected features to
228
     */
229
    inline std::string feature_space_file() const {return _feature_space_file;}
230

Thomas Purcell's avatar
Thomas Purcell committed
231
232
233
234
235
236
    // DocString: feat_space_feature_space_file
    /**
     * @brief Filename of the file to output the human readable representation of the selected features to
     */
    inline std::string feature_space_summary_file() const {return _feature_space_summary_file;}

237
    // DocString: feat_space_l_bound
238
    /**
Thomas Purcell's avatar
Thomas Purcell committed
239
     * @brief The mlower bound for the maximum absolute value of the features
240
     */
241
    inline double l_bound() const {return _l_bound;}
242

243
    // DocString: feat_space_u_bound
244
    /**
Thomas Purcell's avatar
Thomas Purcell committed
245
     * @brief The upper bound for the maximum absolute value of the features
246
     */
247
    inline double u_bound() const {return _u_bound;}
248

Thomas Purcell's avatar
Thomas Purcell committed
249
    // DocString: feat_space_max_rung
250
    /**
Thomas Purcell's avatar
Thomas Purcell committed
251
     * @brief The maximum rung for the feature creation
252
     */
Thomas Purcell's avatar
Thomas Purcell committed
253
    inline int max_rung() const {return _max_rung;}
254

255
    // DocString: feat_space_n_sis_select
256
    /**
Thomas Purcell's avatar
Thomas Purcell committed
257
     * @brief The number of features to select during each SIS iteration
258
     */
259
    inline int n_sis_select() const {return _n_sis_select;}
260

261
    // DocString: feat_space_n_samp
262
    /**
Thomas Purcell's avatar
Thomas Purcell committed
263
     * @brief The nuumber of samples in the training set
264
     */
265
    inline int n_samp() const {return _n_samp;}
266

267
    // DocString: feat_space_n_feat
268
    /**
Thomas Purcell's avatar
Thomas Purcell committed
269
     * @brief The total number of features in the feature space
270
     */
271
    inline int n_feat() const {return _n_feat;}
272

273
    // DocString: feat_space_n_rung_store
274
    /**
Thomas Purcell's avatar
Thomas Purcell committed
275
     * @brief The number of rungs to calculate and store the value of the features for all samples
276
     */
277
    inline int n_rung_store() const {return _n_rung_store;}
278

279
    // DocString: feat_space_n_rung_generate
280
    /**
281
     * @brief Either 0 or 1, and is the number of rungs to generate on the fly during SIS
282
     */
283
    inline int n_rung_generate() const {return _n_rung_generate;}
284

Thomas Purcell's avatar
Thomas Purcell committed
285
286
    /**
     * @brief Generate a new set of non-parameterized features from a single feature
Thomas Purcell's avatar
Thomas Purcell committed
287
     * @details Perform all valid algebraic operations on the passed feature and all features that appear before it in _phi.
Thomas Purcell's avatar
Thomas Purcell committed
288
289
     *
     * @param feat The feature to spawn new features from
Thomas Purcell's avatar
Thomas Purcell committed
290
291
     * @param feat_set The feature set to pull features from for binary operations
     * @param start The point in feat_set to begin pulling features from for binary operations
Thomas Purcell's avatar
Thomas Purcell committed
292
     * @param feat_ind starting index for the next feature generated
Thomas Purcell's avatar
Thomas Purcell committed
293
294
     * @param l_bound lower bound for the maximum absolute value of the feature
     * @param u_bound upper bound for the maximum abosulte value of the feature
Thomas Purcell's avatar
Thomas Purcell committed
295
296
297
298
     */
    void generate_non_param_feats(
        std::vector<node_ptr>::iterator& feat,
        std::vector<node_ptr>& feat_set,
299
        const std::vector<node_ptr>::iterator& start,
Thomas Purcell's avatar
Thomas Purcell committed
300
301
302
303
304
        unsigned long int& feat_ind,
        const double l_bound=1e-50,
        const double u_bound=1e50
    );

305
#ifdef PARAMETERIZE
306
    /**
Thomas Purcell's avatar
Thomas Purcell committed
307
     * @brief Generate a new set of parameterized features from a single feature
Thomas Purcell's avatar
Thomas Purcell committed
308
     * @details Perform all valid algebraic operations on the passed feature and all features that appear before it in _phi.
309
310
     *
     * @param feat The feature to spawn new features from
Thomas Purcell's avatar
Thomas Purcell committed
311
312
     * @param feat_set The feature set to pull features from for binary operations
     * @param start The point in feat_set to begin pulling features from for binary operations
313
314
     * @param feat_ind starting index for the next feature generated
     * @param optimizer The object used to optimize the parameterized features
Thomas Purcell's avatar
Thomas Purcell committed
315
316
     * @param l_bound lower bound for the maximum absolute value of the feature
     * @param u_bound upper bound for the maximum abosulte value of the feature
317
     */
Thomas Purcell's avatar
Thomas Purcell committed
318
    void generate_param_feats(
319
320
        std::vector<node_ptr>::iterator& feat,
        std::vector<node_ptr>& feat_set,
321
        const std::vector<node_ptr>::iterator& start,
322
323
        unsigned long int& feat_ind,
        std::shared_ptr<NLOptimizer> optimizer,
324
325
        const double l_bound=1e-50,
        const double u_bound=1e50
326
    );
Thomas Purcell's avatar
Thomas Purcell committed
327

328
    /**
Thomas Purcell's avatar
Thomas Purcell committed
329
     * @brief Generate a new set of parameterized features for the residuals
330
331
     *
     * @param feat The feature to spawn new features from
Thomas Purcell's avatar
Thomas Purcell committed
332
     * @param feat_set The feature set to pull features from for binary operations
333
     * @param feat_ind starting index for the next feature generated
Thomas Purcell's avatar
Thomas Purcell committed
334
     * @param optimizer The object used to optimize the parameterized features
Thomas Purcell's avatar
Thomas Purcell committed
335
336
     * @param l_bound lower bound for the maximum absolute value of the feature
     * @param u_bound upper bound for the maximum abosulte value of the feature
337
     */
Thomas Purcell's avatar
Thomas Purcell committed
338
    void generate_reparam_feats(
339
340
341
        std::vector<node_ptr>::iterator& feat,
        std::vector<node_ptr>& feat_set,
        unsigned long int& feat_ind,
Thomas Purcell's avatar
Thomas Purcell committed
342
        std::shared_ptr<NLOptimizer> optimizer,
343
344
        const double l_bound=1e-50,
        const double u_bound=1e50
345
    );
Thomas Purcell's avatar
Thomas Purcell committed
346
347
348
349
350
351
352

    /**
     * @brief Generate reparameterized feature set
     *
     * @param prop The property to optimize against
     */
    void generate_reparam_feature_set(const std::vector<double>& prop);
353
#endif
354

355
    /**
Thomas Purcell's avatar
Thomas Purcell committed
356
     * @brief Generate the final rung of features on the fly and calculate their projection scores for SISat can be selected by SIS.
357
     *
Thomas Purcell's avatar
Thomas Purcell committed
358
359
360
     * @param loss The LossFunction used to project over all of the features
     * @param phi_selected The set of features that would be selected excluding the final rung
     * @param scores_selected The projection scores of all features in phi_selected
361
     */
Thomas Purcell's avatar
Thomas Purcell committed
362
    void generate_and_project(std::shared_ptr<LossFunction> loss, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected);
363

364
    /**
365
     * @brief Perform Sure-Independence Screening over the FeatureSpace. The features are ranked using a projection operator constructed using _project_type and the Property vector
366
     *
367
     * @param prop Vector containing the property vector (training data only)
368
     */
369
    void sis(const std::vector<double>& prop);
370

Thomas Purcell's avatar
Thomas Purcell committed
371
    /**
372
     * @brief Perform Sure-Independence Screening over the FeatureSpace. The features are ranked using a projection operator defined in loss
Thomas Purcell's avatar
Thomas Purcell committed
373
     *
374
     * @param loss The LossFunction used to project over all of the features
Thomas Purcell's avatar
Thomas Purcell committed
375
376
377
     */
    void sis(std::shared_ptr<LossFunction> loss);

378
    // DocString: feat_space_feat_in_phi
379
380
381
    /**
     * @brief Is a feature in this process' _phi?
     *
382
     * @param ind (int) The index of the feature
Thomas Purcell's avatar
Thomas Purcell committed
383
     *
384
     * @return True if feature is in this rank's _phi
385
     */
386
    inline bool feat_in_phi(int ind) const {return (ind >= _phi[0]->feat_ind()) && (ind <= _phi.back()->feat_ind());}
387

388
389
390
391
    // DocString: feat_space_remove_feature
    /**
     * @brief Remove a feature from phi
     *
Thomas Purcell's avatar
Thomas Purcell committed
392
     * @param ind (int) index of feature to remove
393
     */
394
    void remove_feature(const int ind);
395

396
397
    // Python Interface Functions
    #ifdef PY_BINDINGS
398
    #ifdef PARAMETERIZE
Thomas Purcell's avatar
Thomas Purcell committed
399
400

    // DocString: feat_space_init_py_list
401
    /**
402
     * @brief FeatureSpace constructor given a set of primary features and operators
403
     *
404
405
406
407
408
     * @param phi_0 (list) The set of primary features
     * @param allowed_ops (list) The list of allowed operators
     * @param allowed_param_ops (list) The list of allowed operators to be used with non-linear optimization
     * @param prop (list) List containing the property vector (training data only)
     * @param project_type (str) The type of loss function/projection operator to use
Thomas Purcell's avatar
Thomas Purcell committed
409
     * @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
410
     * @param n_sis_select (int) The number of features to select during each SIS step
Thomas Purcell's avatar
Thomas Purcell committed
411
     * @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
412
413
414
415
416
417
     * @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
     * @param max_param_depth (int) The maximum depth in the binary expression tree to set non-linear optimization
     * @param reparam_residual (bool) If True then reparameterize features using the residuals of each model
418
419
420
421
422
423
424
     */
    FeatureSpace(
        py::list phi_0,
        py::list allowed_ops,
        py::list allowed_param_ops,
        py::list prop,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
425
        int max_rung=1,
426
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
427
        int n_rung_store=-1,
428
429
430
431
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50,
432
433
        int max_param_depth = -1,
        bool reparam_residual=false
434
435
    );

Thomas Purcell's avatar
Thomas Purcell committed
436
    // DocString: feat_space_init_np_array
437
    /**
438
     * @brief FeatureSpace constructor given a set of primary features and operators
439
     *
440
441
442
443
444
     * @param phi_0 (list) The set of primary features
     * @param allowed_ops (list) The list of allowed operators
     * @param allowed_param_ops (list) The list of allowed operators to be used with non-linear optimization
     * @param prop (np.ndarray) List containing the property vector (training data only)
     * @param project_type (str) The type of loss function/projection operator to use
Thomas Purcell's avatar
Thomas Purcell committed
445
     * @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
446
     * @param n_sis_select (int) The number of features to select during each SIS step
Thomas Purcell's avatar
Thomas Purcell committed
447
     * @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
448
449
450
451
452
453
     * @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
     * @param max_param_depth (int) The maximum depth in the binary expression tree to set non-linear optimization
     * @param reparam_residual (bool) If True then reparameterize features using the residuals of each model
454
455
456
457
458
459
460
     */
    FeatureSpace(
        py::list phi_0,
        py::list allowed_ops,
        py::list allowed_param_ops,
        np::ndarray prop,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
461
        int max_rung=1,
462
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
463
        int n_rung_store=-1,
464
465
466
467
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50,
468
469
        int max_param_depth = -1,
        bool reparam_residual=false
470
    );
Thomas Purcell's avatar
Thomas Purcell committed
471

472
    #else
Thomas Purcell's avatar
Thomas Purcell committed
473
474

    // DocString: feat_space_ini_no_param_py_list
475
    /**
476
     * @brief FeatureSpace constructor given a set of primary features and operators
477
     *
478
479
480
481
     * @param phi_0 (list) The set of primary features
     * @param allowed_ops (list) The list of allowed operators
     * @param prop (list) List containing the property vector (training data only)
     * @param project_type (str) The type of loss function/projection operator to use
Thomas Purcell's avatar
Thomas Purcell committed
482
     * @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
483
     * @param n_sis_select (int) The number of features to select during each SIS step
Thomas Purcell's avatar
Thomas Purcell committed
484
     * @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
485
486
487
488
     * @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
489
490
491
492
493
494
     */
    FeatureSpace(
        py::list phi_0,
        py::list allowed_ops,
        py::list prop,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
495
        int max_rung=1,
496
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
497
        int n_rung_store=-1,
498
499
500
501
502
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );
503

Thomas Purcell's avatar
Thomas Purcell committed
504
    // DocString: feat_space_init_no_param_np_array
505
    /**
506
     * @brief FeatureSpace constructor given a set of primary features and operators
507
     *
508
509
510
511
     * @param phi_0 (list) The set of primary features
     * @param allowed_ops (list) The list of allowed operators
     * @param prop (np.ndarray) List containing the property vector (training data only)
     * @param project_type (str) The type of loss function/projection operator to use
Thomas Purcell's avatar
Thomas Purcell committed
512
     * @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
513
     * @param n_sis_select (int) The number of features to select during each SIS step
Thomas Purcell's avatar
Thomas Purcell committed
514
     * @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
515
516
517
518
     * @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
519
520
521
522
523
524
     */
    FeatureSpace(
        py::list phi_0,
        py::list allowed_ops,
        np::ndarray prop,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
525
        int max_rung=1,
526
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
527
        int n_rung_store=-1,
528
529
530
531
532
533
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );
    #endif
Thomas Purcell's avatar
Thomas Purcell committed
534

535
    // DocString: feat_space_init_file_np_array
536
    /**
537
     * @brief FeatureSpace constructor that uses a file containing postfix feature expressions to describe all features in Phi, and a primary feature setn <python/feature_creation/FeatureSpace.cpp>)
538
     *
539
540
541
542
543
544
545
     * @param feature_file (str) The file containing the postfix expressions of all features in the FeatureSpace
     * @param phi_0 (list) The set of primary features
     * @param prop (np.ndarray) List containing the property vector (training data only)
     * @param task_sizes (list) The number of samples in the training data per task
     * @param project_type (str) The type of loss function/projection operator to use
     * @param n_sis_select (int) The number of features to select during each SIS step
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
546
547
548
549
550
551
     */
    FeatureSpace(
        std::string feature_file,
        py::list phi_0,
        np::ndarray prop,
        py::list task_sizes,
Thomas Purcell's avatar
Thomas Purcell committed
552
        std::string project_type="regression",
553
554
555
556
        int n_sis_select=1,
        double cross_corr_max=1.0
    );

557
    // DocString: feat_space_init_file_py_list
558
    /**
559
     * @brief FeatureSpace constructor that uses a file containing postfix feature expressions to describe all features in Phi, and a primary feature setn <python/feature_creation/FeatureSpace.cpp>)
560
     *
561
562
563
564
565
566
567
     * @param feature_file (str) The file containing the postfix expressions of all features in the FeatureSpace
     * @param phi_0 (list) The set of primary features
     * @param prop (list) List containing the property vector (training data only)
     * @param task_sizes (list) The number of samples in the training data per task
     * @param project_type (str) The type of loss function/projection operator to use
     * @param n_sis_select (int) The number of features to select during each SIS step
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
568
569
570
571
572
573
     */
    FeatureSpace(
        std::string feature_file,
        py::list phi_0,
        py::list prop,
        py::list task_sizes,
Thomas Purcell's avatar
Thomas Purcell committed
574
        std::string project_type="regression",
575
576
577
578
579
580
        int n_sis_select=1,
        double cross_corr_max=1.0
    );

    // DocString: feat_space_sis_arr
    /**
581
     * @brief Perform Sure-Independence Screening over the FeatureSpace. The features are ranked using a projection operator constructed using _project_type and the Property vector
582
     *
583
     * @param prop (np.ndarray) Array containing the property vector (training data only)
584
585
586
587
588
589
590
591
592
     */
    inline void sis(np::ndarray prop)
    {
        std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop);
        sis(prop_vec);
    }

    // DocString: feat_space_sis_list
    /**
593
     * @brief Perform Sure-Independence Screening over the FeatureSpace. The features are ranked using a projection operator constructed using _project_type and the Property vector
594
     *
595
     * @param prop (list) List containing the property vector (training data only)
596
597
598
599
600
601
602
603
604
     */
    inline void sis(py::list prop)
    {
        std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop);
        sis(prop_vec);
    }

    // DocString: feat_space_phi_selected_py
    /**
Thomas Purcell's avatar
Thomas Purcell committed
605
     * @brief A list containing all of the selected features
606
607
608
609
610
     */
    py::list phi_selected_py();

    // DocString: feat_space_phi0_py
    /**
Thomas Purcell's avatar
Thomas Purcell committed
611
     * @brief A list containing all features generated (Not including those created on the Fly during SIS)
612
     */
Thomas Purcell's avatar
Thomas Purcell committed
613
    py::list phi_py();
614
615
616

    // DocString: feat_space_phi_py
    /**
Thomas Purcell's avatar
Thomas Purcell committed
617
     * @brief A list containing all of the Primary features
618
     */
Thomas Purcell's avatar
Thomas Purcell committed
619
    py::list phi0_py();
620
621
622

    // DocString: feat_space_scores_py
    /**
623
     * @brief An array of all stored projection scores from SIS
624
625
626
627
628
     */
    inline np::ndarray scores_py(){return python_conv_utils::to_ndarray<double>(_scores);};

    // DocString: feat_space_task_sizes_py
    /**
629
     * @brief A list of the number of samples in each task for the training data
630
631
632
633
634
     */
    inline py::list task_sizes_py(){return python_conv_utils::to_list<int>(_task_sizes);};

    // DocString: feat_space_allowed_ops_py
    /**
635
     * @brief The list of allowed operators
636
637
638
     */
    inline py::list allowed_ops_py(){return python_conv_utils::to_list<std::string>(_allowed_ops);}

Thomas Purcell's avatar
Thomas Purcell committed
639
    // DocString: feat_space_start_rung_py
640
    /**
641
     * @brief A list containing the index of the first feature of each rung in the feature space.
642
     */
Thomas Purcell's avatar
Thomas Purcell committed
643
    inline py::list start_rung_py(){return python_conv_utils::to_list<int>(_start_rung);}
644
645
646

    // DocString: feat_space_get_feature
    /**
647
     * @brief Access the feature in _phi with an index ind
648
     *
649
     * @param ind (int) The index of the feature to get
650
651
     * @return A ModelNode of the feature at index ind
     */
652
    inline ModelNode get_feature(const int ind) const {return ModelNode(_phi[ind]);}
653
    #endif
Thomas Purcell's avatar
Thomas Purcell committed
654
655
};

656
#endif