FeatureSpace.hpp 28.8 KB
Newer Older
1
// Copyright 2021 Thomas A. R. Purcell
2
//
3
4
5
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
10
11
12
13
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
14

15
/** @file feature_creation/feature_space/FeatureSpace.hpp
16
 *  @brief Defines the class for creating/operating on a feature space in SISSO
17
 *
18
 *  @author Thomas A. R. Purcell (tpurcell90)
19
20
21
 *  @bug No known bugs.
 */

Thomas Purcell's avatar
Thomas Purcell committed
22
23
24
#ifndef FEATURE_SPACE
#define FEATURE_SPACE

25
#include <boost/filesystem.hpp>
Thomas Purcell's avatar
Thomas Purcell committed
26

27
#include <utility>
Thomas Purcell's avatar
Thomas Purcell committed
28

Thomas Purcell's avatar
Thomas Purcell committed
29
#include "feature_creation/node/utils.hpp"
Thomas Purcell's avatar
Thomas Purcell committed
30
31

#include "mpi_interface/MPI_Interface.hpp"
32
#include "mpi_interface/MPI_Ops.hpp"
Thomas Purcell's avatar
Thomas Purcell committed
33
34
#include "mpi_interface/serialize_tuple.h"

Thomas Purcell's avatar
Thomas Purcell committed
35
36
#include "utils/project.hpp"

37
38
39
40
#ifdef PY_BINDINGS
    namespace np = boost::python::numpy;
    namespace py = boost::python;
#endif
41

42
// DocString: cls_feat_space
43
/**
44
 * @brief Feature Space for SISSO calculations. It stores and performs all actions on the feature space for SISSO.
45
46
 *
 */
Thomas Purcell's avatar
Thomas Purcell committed
47
48
class FeatureSpace
{
Thomas Purcell's avatar
Thomas Purcell committed
49
50
51
    std::vector<node_ptr> _phi_selected; //!< A vector containing all of the selected features
    std::vector<node_ptr> _phi; //!< A vector containing all features generated (Not including those created on the Fly during SIS)
    const std::vector<node_ptr> _phi_0; //!< A vector containing all of the Primary features
52

53
    #ifdef PARAMETERIZE
Thomas Purcell's avatar
Thomas Purcell committed
54
55
56
57
58
59
60
61
    std::vector<node_ptr> _phi_reparam; //!< A vector containing the features created when reparameterizating using the residuals
    std::vector<int> _end_no_params; //!< A vector containing the indexes of each rung where parameterized nodes start
    std::vector<int> _start_rung_reparam; //!< A vector containing the indexes of each rung where parameterized nodes start

    std::vector<un_param_op_node_gen> _un_param_operators; //!< Vector containing all parameterized unary operators with free parameters
    std::vector<bin_param_op_node_gen> _com_bin_param_operators; //!< Vector containing all parameterized commutable binary operators with free parameters
    std::vector<bin_param_op_node_gen> _bin_param_operators; //!< Vector containing all parameterized binary operators with free parameters
    std::vector<std::string> _allowed_param_ops; //!< Vector containing all allowed operators strings for operators with free parameters
62
    #endif
63

Thomas Purcell's avatar
Thomas Purcell committed
64
65
66
67
    std::vector<std::string> _allowed_ops; //!< Vector containing all allowed operators strings
    std::vector<un_op_node_gen> _un_operators; //!< Vector containing all unary operators
    std::vector<bin_op_node_gen> _com_bin_operators; //!< Vector containing all commutable binary operators
    std::vector<bin_op_node_gen> _bin_operators; //!< Vector containing all binary operators
68

Thomas Purcell's avatar
Thomas Purcell committed
69
70
    std::vector<double> _prop; //!< The value of the property vector for each training sample
    std::vector<double> _scores; //!< The projection scores for each feature
71

Thomas Purcell's avatar
Thomas Purcell committed
72
73
74
75
76
    const std::vector<int> _task_sizes; //!< Number of training samples per task
    std::vector<int> _start_rung; //!< Vector containing the indexes where each rung starts in _phi
    const std::string _project_type; //!< The type of LossFunction to use when projecting the features onto a property
    const std::string _feature_space_file; //!< File to output the computer readable representation of the selected features to
    const std::string _feature_space_summary_file; //!< File to output the human readable representation of the selected features to
77

Thomas Purcell's avatar
Thomas Purcell committed
78
79
    std::function<bool(const double*, const int, const double, const std::vector<double>&, const double, const int, const int)> _is_valid; //!< Function used to determine of a feature is too correlated to previously selected features
    std::function<bool(const double*, const int, const double, const std::vector<node_ptr>&, const std::vector<double>&, const double)> _is_valid_feat_list; //!< Function used to determine of a feature is too correlated to previously selected features within a given list
80

Thomas Purcell's avatar
Thomas Purcell committed
81
    std::shared_ptr<MPI_Interface> _mpi_comm; //!< the MPI communicator for the calculation
82

83
    const double _cross_cor_max; //!< Maximum cross-correlation used for selecting features
Thomas Purcell's avatar
Thomas Purcell committed
84
85
    const double _l_bound; //!< The lower bound for the maximum absolute value of the features
    const double _u_bound; //!< The upper bound for the maximum absolute value of the features
86

Thomas Purcell's avatar
Thomas Purcell committed
87
88
89
    int _n_rung_store; //!< The number of rungs to calculate and store the value of the features for all samples
    int _n_feat; //!< Total number of features in the feature space
    int _max_rung; //!< Maximum rung for the feature creation
90

Thomas Purcell's avatar
Thomas Purcell committed
91
92
93
    const int _n_sis_select; //!< Number of features to select during each SIS iteration
    const int _n_samp; //!< Number of samples in the training set
    const int _n_rung_generate; //!< Either 0 or 1, and is the number of rungs to generate on the fly during SIS
94

Thomas Purcell's avatar
Thomas Purcell committed
95
96
    int _max_param_depth; //!< The maximum depth in the binary expression tree to set non-linear optimization
    const bool _reparam_residual; //!< If True then reparameterize features using the residuals of each model
97

Thomas Purcell's avatar
Thomas Purcell committed
98
public:
Thomas Purcell's avatar
Thomas Purcell committed
99

100
    #ifdef PARAMETERIZE
101
    /**
Thomas Purcell's avatar
Thomas Purcell committed
102
     * @brief FeatureSpace constructor given a set of primary features and operators
103
104
     *
     * @param mpi_comm MPI communicator for the calculations
Thomas Purcell's avatar
Thomas Purcell committed
105
106
107
108
     * @param phi_0 The set of primary features
     * @param allowed_ops The list of allowed operators
     * @param allowed_param_ops The list of allowed operators to be used with non-linear optimization
     * @param prop List containing the property vector (training data only)
109
     * @param task_sizes The number of samples per task
Thomas Purcell's avatar
Thomas Purcell committed
110
111
112
113
114
115
116
117
118
119
     * @param project_type The type of loss function/projection operator to use
     * @param max_rung The maximum rung of the feature (Height of the binary expression tree -1)
     * @param n_sis_select The number of features to select during each SIS step
     * @param n_rung_store The number of rungs whose feature's data is always stored in memory
     * @param n_rung_generate Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val The maximum allowed absolute feature value for a feature
     * @param max_param_depth The maximum depth in the binary expression tree to set non-linear optimization
     * @param reparam_residual If True then reparameterize features using the residuals of each model
120
     */
Thomas Purcell's avatar
Thomas Purcell committed
121
    FeatureSpace(
Thomas Purcell's avatar
Thomas Purcell committed
122
        std::shared_ptr<MPI_Interface> mpi_comm,
Thomas Purcell's avatar
Thomas Purcell committed
123
124
        std::vector<node_ptr> phi_0,
        std::vector<std::string> allowed_ops,
Thomas Purcell's avatar
Thomas Purcell committed
125
        std::vector<std::string> allowed_param_ops,
126
        std::vector<double> prop,
Thomas Purcell's avatar
Thomas Purcell committed
127
        std::vector<int> task_sizes,
128
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
129
        int max_rung=1,
Thomas Purcell's avatar
Thomas Purcell committed
130
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
131
        int n_rung_store=-1,
132
        int n_rung_generate=0,
Thomas Purcell's avatar
Thomas Purcell committed
133
        double cross_corr_max=1.0,
134
        double min_abs_feat_val=1e-50,
135
        double max_abs_feat_val=1e50,
136
137
        int max_param_depth=-1,
        bool reparam_residual=false
138
    );
139
140
    #else
    /**
Thomas Purcell's avatar
Thomas Purcell committed
141
     * @brief FeatureSpace constructor given a set of primary features and operators
142
143
     *
     * @param mpi_comm MPI communicator for the calculations
Thomas Purcell's avatar
Thomas Purcell committed
144
145
146
     * @param phi_0 The set of primary features
     * @param allowed_ops The list of allowed operators
     * @param prop List containing the property vector (training data only)
147
     * @param task_sizes The number of samples per task
Thomas Purcell's avatar
Thomas Purcell committed
148
149
150
151
152
153
154
155
     * @param project_type The type of loss function/projection operator to use
     * @param max_rung The maximum rung of the feature (Height of the binary expression tree -1)
     * @param n_sis_select The number of features to select during each SIS step
     * @param n_rung_store The number of rungs whose feature's data is always stored in memory
     * @param n_rung_generate Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val The maximum allowed absolute feature value for a feature
156
157
158
159
160
161
162
163
     */
    FeatureSpace(
        std::shared_ptr<MPI_Interface> mpi_comm,
        std::vector<node_ptr> phi_0,
        std::vector<std::string> allowed_ops,
        std::vector<double> prop,
        std::vector<int> task_sizes,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
164
        int max_rung=1,
165
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
166
        int n_rung_store=-1,
167
168
169
170
171
172
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );
    #endif
173
    /**
Thomas Purcell's avatar
Thomas Purcell committed
174
     * @brief Initialize members of the FeatureSpace using _prop
175
     */
Thomas Purcell's avatar
Thomas Purcell committed
176
    void initialize_fs();
177

178
    /**
Thomas Purcell's avatar
Thomas Purcell committed
179
     * @brief Populate the operator lists using _allowed_ops and _allowed_param_ops
180
181
182
183
     */
    void set_op_lists();

    /**
Thomas Purcell's avatar
Thomas Purcell committed
184
     * @brief Create SIS output files and write their headers
185
     */
186
    void initialize_fs_output_files() const;
Thomas Purcell's avatar
Thomas Purcell committed
187

188
    /**
Thomas Purcell's avatar
Thomas Purcell committed
189
     * @brief Populate _phi using _phi_0 and the allowed operators up to (_max_rung - _n_rung_generate)^th rung
190
     */
191
    void generate_feature_space();
Thomas Purcell's avatar
Thomas Purcell committed
192

193
    /**
Thomas Purcell's avatar
Thomas Purcell committed
194
     * @brief A vector containing all of the selected features
195
     */
196
    inline std::vector<node_ptr> phi_selected() const {return _phi_selected;};
197
198

    /**
Thomas Purcell's avatar
Thomas Purcell committed
199
     * @brief A vector containing all features generated (Not including those created on the Fly during SIS)
200
     */
201
    inline std::vector<node_ptr> phi() const {return _phi;};
202
203

    /**
Thomas Purcell's avatar
Thomas Purcell committed
204
     * @brief A vector containing all of the Primary features
205
     */
206
    inline std::vector<node_ptr> phi0() const {return _phi_0;};
207
208

    /**
Thomas Purcell's avatar
Thomas Purcell committed
209
     * @brief The projection scores for each feature in _phi
210
     */
211
    inline std::vector<double> scores() const {return _scores;}
212

213
    /**
214
     * @brief The MPI Communicator
215
     */
216
    inline std::shared_ptr<MPI_Interface> mpi_comm() const {return _mpi_comm;}
217

218
    /**
Thomas Purcell's avatar
Thomas Purcell committed
219
     * @brief Number of training samples per task
220
     */
221
    inline std::vector<int> task_sizes() const {return _task_sizes;}
222

223
    // DocString: feat_space_feature_space_file
224
    /**
Thomas Purcell's avatar
Thomas Purcell committed
225
     * @brief Filename of the file to output the computer readable representation of the selected features to
226
     */
227
    inline std::string feature_space_file() const {return _feature_space_file;}
228

Thomas Purcell's avatar
Thomas Purcell committed
229
230
231
232
233
234
    // DocString: feat_space_feature_space_file
    /**
     * @brief Filename of the file to output the human readable representation of the selected features to
     */
    inline std::string feature_space_summary_file() const {return _feature_space_summary_file;}

235
    // DocString: feat_space_l_bound
236
    /**
Thomas Purcell's avatar
Thomas Purcell committed
237
     * @brief The mlower bound for the maximum absolute value of the features
238
     */
239
    inline double l_bound() const {return _l_bound;}
240

241
    // DocString: feat_space_u_bound
242
    /**
Thomas Purcell's avatar
Thomas Purcell committed
243
     * @brief The upper bound for the maximum absolute value of the features
244
     */
245
    inline double u_bound() const {return _u_bound;}
246

Thomas Purcell's avatar
Thomas Purcell committed
247
    // DocString: feat_space_max_rung
248
    /**
Thomas Purcell's avatar
Thomas Purcell committed
249
     * @brief The maximum rung for the feature creation
250
     */
Thomas Purcell's avatar
Thomas Purcell committed
251
    inline int max_rung() const {return _max_rung;}
252

253
    // DocString: feat_space_n_sis_select
254
    /**
Thomas Purcell's avatar
Thomas Purcell committed
255
     * @brief The number of features to select during each SIS iteration
256
     */
257
    inline int n_sis_select() const {return _n_sis_select;}
258

259
    // DocString: feat_space_n_samp
260
    /**
Thomas Purcell's avatar
Thomas Purcell committed
261
     * @brief The nuumber of samples in the training set
262
     */
263
    inline int n_samp() const {return _n_samp;}
264

265
    // DocString: feat_space_n_feat
266
    /**
Thomas Purcell's avatar
Thomas Purcell committed
267
     * @brief The total number of features in the feature space
268
     */
269
    inline int n_feat() const {return _n_feat;}
270

271
    // DocString: feat_space_n_rung_store
272
    /**
Thomas Purcell's avatar
Thomas Purcell committed
273
     * @brief The number of rungs to calculate and store the value of the features for all samples
274
     */
275
    inline int n_rung_store() const {return _n_rung_store;}
276

277
    // DocString: feat_space_n_rung_generate
278
    /**
279
     * @brief Either 0 or 1, and is the number of rungs to generate on the fly during SIS
280
     */
281
    inline int n_rung_generate() const {return _n_rung_generate;}
282

Thomas Purcell's avatar
Thomas Purcell committed
283
284
    /**
     * @brief Generate a new set of non-parameterized features from a single feature
Thomas Purcell's avatar
Thomas Purcell committed
285
     * @details Perform all valid algebraic operations on the passed feature and all features that appear before it in _phi.
Thomas Purcell's avatar
Thomas Purcell committed
286
287
     *
     * @param feat The feature to spawn new features from
Thomas Purcell's avatar
Thomas Purcell committed
288
289
     * @param feat_set The feature set to pull features from for binary operations
     * @param start The point in feat_set to begin pulling features from for binary operations
Thomas Purcell's avatar
Thomas Purcell committed
290
     * @param feat_ind starting index for the next feature generated
Thomas Purcell's avatar
Thomas Purcell committed
291
292
     * @param l_bound lower bound for the maximum absolute value of the feature
     * @param u_bound upper bound for the maximum abosulte value of the feature
Thomas Purcell's avatar
Thomas Purcell committed
293
294
295
296
     */
    void generate_non_param_feats(
        std::vector<node_ptr>::iterator& feat,
        std::vector<node_ptr>& feat_set,
297
        const std::vector<node_ptr>::iterator& start,
Thomas Purcell's avatar
Thomas Purcell committed
298
299
300
301
302
        unsigned long int& feat_ind,
        const double l_bound=1e-50,
        const double u_bound=1e50
    );

303
#ifdef PARAMETERIZE
304
    /**
Thomas Purcell's avatar
Thomas Purcell committed
305
     * @brief Generate a new set of parameterized features from a single feature
Thomas Purcell's avatar
Thomas Purcell committed
306
     * @details Perform all valid algebraic operations on the passed feature and all features that appear before it in _phi.
307
308
     *
     * @param feat The feature to spawn new features from
Thomas Purcell's avatar
Thomas Purcell committed
309
310
     * @param feat_set The feature set to pull features from for binary operations
     * @param start The point in feat_set to begin pulling features from for binary operations
311
312
     * @param feat_ind starting index for the next feature generated
     * @param optimizer The object used to optimize the parameterized features
Thomas Purcell's avatar
Thomas Purcell committed
313
314
     * @param l_bound lower bound for the maximum absolute value of the feature
     * @param u_bound upper bound for the maximum abosulte value of the feature
315
     */
Thomas Purcell's avatar
Thomas Purcell committed
316
    void generate_param_feats(
317
318
        std::vector<node_ptr>::iterator& feat,
        std::vector<node_ptr>& feat_set,
319
        const std::vector<node_ptr>::iterator& start,
320
321
        unsigned long int& feat_ind,
        std::shared_ptr<NLOptimizer> optimizer,
322
323
        const double l_bound=1e-50,
        const double u_bound=1e50
324
    );
Thomas Purcell's avatar
Thomas Purcell committed
325

326
    /**
Thomas Purcell's avatar
Thomas Purcell committed
327
     * @brief Generate a new set of parameterized features for the residuals
328
329
     *
     * @param feat The feature to spawn new features from
Thomas Purcell's avatar
Thomas Purcell committed
330
     * @param feat_set The feature set to pull features from for binary operations
331
     * @param feat_ind starting index for the next feature generated
Thomas Purcell's avatar
Thomas Purcell committed
332
     * @param optimizer The object used to optimize the parameterized features
Thomas Purcell's avatar
Thomas Purcell committed
333
334
     * @param l_bound lower bound for the maximum absolute value of the feature
     * @param u_bound upper bound for the maximum abosulte value of the feature
335
     */
Thomas Purcell's avatar
Thomas Purcell committed
336
    void generate_reparam_feats(
337
338
339
        std::vector<node_ptr>::iterator& feat,
        std::vector<node_ptr>& feat_set,
        unsigned long int& feat_ind,
Thomas Purcell's avatar
Thomas Purcell committed
340
        std::shared_ptr<NLOptimizer> optimizer,
341
342
        const double l_bound=1e-50,
        const double u_bound=1e50
343
    );
Thomas Purcell's avatar
Thomas Purcell committed
344
345
346
347
348
349
350

    /**
     * @brief Generate reparameterized feature set
     *
     * @param prop The property to optimize against
     */
    void generate_reparam_feature_set(const std::vector<double>& prop);
351
#endif
352

353
    /**
Thomas Purcell's avatar
Thomas Purcell committed
354
     * @brief Generate the final rung of features on the fly and calculate their projection scores for SISat can be selected by SIS.
355
     *
Thomas Purcell's avatar
Thomas Purcell committed
356
357
358
     * @param loss The LossFunction used to project over all of the features
     * @param phi_selected The set of features that would be selected excluding the final rung
     * @param scores_selected The projection scores of all features in phi_selected
359
     */
Thomas Purcell's avatar
Thomas Purcell committed
360
    void generate_and_project(std::shared_ptr<LossFunction> loss, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected);
361

362
    /**
363
     * @brief Perform Sure-Independence Screening over the FeatureSpace. The features are ranked using a projection operator constructed using _project_type and the Property vector
364
     *
365
     * @param prop Vector containing the property vector (training data only)
366
     */
367
    void sis(const std::vector<double>& prop);
368

Thomas Purcell's avatar
Thomas Purcell committed
369
    /**
370
     * @brief Perform Sure-Independence Screening over the FeatureSpace. The features are ranked using a projection operator defined in loss
Thomas Purcell's avatar
Thomas Purcell committed
371
     *
372
     * @param loss The LossFunction used to project over all of the features
Thomas Purcell's avatar
Thomas Purcell committed
373
374
375
     */
    void sis(std::shared_ptr<LossFunction> loss);

376
    // DocString: feat_space_feat_in_phi
377
378
379
    /**
     * @brief Is a feature in this process' _phi?
     *
380
     * @param ind (int) The index of the feature
Thomas Purcell's avatar
Thomas Purcell committed
381
     *
382
     * @return True if feature is in this rank's _phi
383
     */
384
    inline bool feat_in_phi(int ind) const {return (ind >= _phi[0]->feat_ind()) && (ind <= _phi.back()->feat_ind());}
385

386
387
388
389
    // DocString: feat_space_remove_feature
    /**
     * @brief Remove a feature from phi
     *
Thomas Purcell's avatar
Thomas Purcell committed
390
     * @param ind (int) index of feature to remove
391
     */
392
    void remove_feature(const int ind);
393

394
395
    // Python Interface Functions
    #ifdef PY_BINDINGS
396
    #ifdef PARAMETERIZE
Thomas Purcell's avatar
Thomas Purcell committed
397
398

    // DocString: feat_space_init_py_list
399
    /**
400
     * @brief FeatureSpace constructor given a set of primary features and operators
401
     *
402
403
404
405
406
     * @param phi_0 (list) The set of primary features
     * @param allowed_ops (list) The list of allowed operators
     * @param allowed_param_ops (list) The list of allowed operators to be used with non-linear optimization
     * @param prop (list) List containing the property vector (training data only)
     * @param project_type (str) The type of loss function/projection operator to use
Thomas Purcell's avatar
Thomas Purcell committed
407
     * @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
408
     * @param n_sis_select (int) The number of features to select during each SIS step
Thomas Purcell's avatar
Thomas Purcell committed
409
     * @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
410
411
412
413
414
415
     * @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
     * @param max_param_depth (int) The maximum depth in the binary expression tree to set non-linear optimization
     * @param reparam_residual (bool) If True then reparameterize features using the residuals of each model
416
417
418
419
420
421
422
     */
    FeatureSpace(
        py::list phi_0,
        py::list allowed_ops,
        py::list allowed_param_ops,
        py::list prop,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
423
        int max_rung=1,
424
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
425
        int n_rung_store=-1,
426
427
428
429
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50,
430
431
        int max_param_depth = -1,
        bool reparam_residual=false
432
433
    );

Thomas Purcell's avatar
Thomas Purcell committed
434
    // DocString: feat_space_init_np_array
435
    /**
436
     * @brief FeatureSpace constructor given a set of primary features and operators
437
     *
438
439
440
441
442
     * @param phi_0 (list) The set of primary features
     * @param allowed_ops (list) The list of allowed operators
     * @param allowed_param_ops (list) The list of allowed operators to be used with non-linear optimization
     * @param prop (np.ndarray) List containing the property vector (training data only)
     * @param project_type (str) The type of loss function/projection operator to use
Thomas Purcell's avatar
Thomas Purcell committed
443
     * @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
444
     * @param n_sis_select (int) The number of features to select during each SIS step
Thomas Purcell's avatar
Thomas Purcell committed
445
     * @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
446
447
448
449
450
451
     * @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
     * @param max_param_depth (int) The maximum depth in the binary expression tree to set non-linear optimization
     * @param reparam_residual (bool) If True then reparameterize features using the residuals of each model
452
453
454
455
456
457
458
     */
    FeatureSpace(
        py::list phi_0,
        py::list allowed_ops,
        py::list allowed_param_ops,
        np::ndarray prop,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
459
        int max_rung=1,
460
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
461
        int n_rung_store=-1,
462
463
464
465
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50,
466
467
        int max_param_depth = -1,
        bool reparam_residual=false
468
    );
Thomas Purcell's avatar
Thomas Purcell committed
469

470
    #else
Thomas Purcell's avatar
Thomas Purcell committed
471
472

    // DocString: feat_space_ini_no_param_py_list
473
    /**
474
     * @brief FeatureSpace constructor given a set of primary features and operators
475
     *
476
477
478
479
     * @param phi_0 (list) The set of primary features
     * @param allowed_ops (list) The list of allowed operators
     * @param prop (list) List containing the property vector (training data only)
     * @param project_type (str) The type of loss function/projection operator to use
Thomas Purcell's avatar
Thomas Purcell committed
480
     * @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
481
     * @param n_sis_select (int) The number of features to select during each SIS step
Thomas Purcell's avatar
Thomas Purcell committed
482
     * @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
483
484
485
486
     * @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
487
488
489
490
491
492
     */
    FeatureSpace(
        py::list phi_0,
        py::list allowed_ops,
        py::list prop,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
493
        int max_rung=1,
494
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
495
        int n_rung_store=-1,
496
497
498
499
500
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );
501

Thomas Purcell's avatar
Thomas Purcell committed
502
    // DocString: feat_space_init_no_param_np_array
503
    /**
504
     * @brief FeatureSpace constructor given a set of primary features and operators
505
     *
506
507
508
509
     * @param phi_0 (list) The set of primary features
     * @param allowed_ops (list) The list of allowed operators
     * @param prop (np.ndarray) List containing the property vector (training data only)
     * @param project_type (str) The type of loss function/projection operator to use
Thomas Purcell's avatar
Thomas Purcell committed
510
     * @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
511
     * @param n_sis_select (int) The number of features to select during each SIS step
Thomas Purcell's avatar
Thomas Purcell committed
512
     * @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
513
514
515
516
     * @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
     * @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
     * @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
517
518
519
520
521
522
     */
    FeatureSpace(
        py::list phi_0,
        py::list allowed_ops,
        np::ndarray prop,
        std::string project_type="regression",
Thomas Purcell's avatar
Thomas Purcell committed
523
        int max_rung=1,
524
        int n_sis_select=1,
Thomas Purcell's avatar
Thomas Purcell committed
525
        int n_rung_store=-1,
526
527
528
529
530
531
        int n_rung_generate=0,
        double cross_corr_max=1.0,
        double min_abs_feat_val=1e-50,
        double max_abs_feat_val=1e50
    );
    #endif
Thomas Purcell's avatar
Thomas Purcell committed
532

533
    // DocString: feat_space_init_file_np_array
534
    /**
535
     * @brief FeatureSpace constructor that uses a file containing postfix feature expressions to describe all features in Phi, and a primary feature setn <python/feature_creation/FeatureSpace.cpp>)
536
     *
537
538
539
540
541
542
543
     * @param feature_file (str) The file containing the postfix expressions of all features in the FeatureSpace
     * @param phi_0 (list) The set of primary features
     * @param prop (np.ndarray) List containing the property vector (training data only)
     * @param task_sizes (list) The number of samples in the training data per task
     * @param project_type (str) The type of loss function/projection operator to use
     * @param n_sis_select (int) The number of features to select during each SIS step
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
544
545
546
547
548
549
     */
    FeatureSpace(
        std::string feature_file,
        py::list phi_0,
        np::ndarray prop,
        py::list task_sizes,
Thomas Purcell's avatar
Thomas Purcell committed
550
        std::string project_type="regression",
551
552
553
554
        int n_sis_select=1,
        double cross_corr_max=1.0
    );

555
    // DocString: feat_space_init_file_py_list
556
    /**
557
     * @brief FeatureSpace constructor that uses a file containing postfix feature expressions to describe all features in Phi, and a primary feature setn <python/feature_creation/FeatureSpace.cpp>)
558
     *
559
560
561
562
563
564
565
     * @param feature_file (str) The file containing the postfix expressions of all features in the FeatureSpace
     * @param phi_0 (list) The set of primary features
     * @param prop (list) List containing the property vector (training data only)
     * @param task_sizes (list) The number of samples in the training data per task
     * @param project_type (str) The type of loss function/projection operator to use
     * @param n_sis_select (int) The number of features to select during each SIS step
     * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
566
567
568
569
570
571
     */
    FeatureSpace(
        std::string feature_file,
        py::list phi_0,
        py::list prop,
        py::list task_sizes,
Thomas Purcell's avatar
Thomas Purcell committed
572
        std::string project_type="regression",
573
574
575
576
577
578
        int n_sis_select=1,
        double cross_corr_max=1.0
    );

    // DocString: feat_space_sis_arr
    /**
579
     * @brief Perform Sure-Independence Screening over the FeatureSpace. The features are ranked using a projection operator constructed using _project_type and the Property vector
580
     *
581
     * @param prop (np.ndarray) Array containing the property vector (training data only)
582
583
584
585
586
587
588
589
590
     */
    inline void sis(np::ndarray prop)
    {
        std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop);
        sis(prop_vec);
    }

    // DocString: feat_space_sis_list
    /**
591
     * @brief Perform Sure-Independence Screening over the FeatureSpace. The features are ranked using a projection operator constructed using _project_type and the Property vector
592
     *
593
     * @param prop (list) List containing the property vector (training data only)
594
595
596
597
598
599
600
601
602
     */
    inline void sis(py::list prop)
    {
        std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop);
        sis(prop_vec);
    }

    // DocString: feat_space_phi_selected_py
    /**
Thomas Purcell's avatar
Thomas Purcell committed
603
     * @brief A list containing all of the selected features
604
605
606
607
608
     */
    py::list phi_selected_py();

    // DocString: feat_space_phi0_py
    /**
Thomas Purcell's avatar
Thomas Purcell committed
609
     * @brief A list containing all features generated (Not including those created on the Fly during SIS)
610
     */
Thomas Purcell's avatar
Thomas Purcell committed
611
    py::list phi_py();
612
613
614

    // DocString: feat_space_phi_py
    /**
Thomas Purcell's avatar
Thomas Purcell committed
615
     * @brief A list containing all of the Primary features
616
     */
Thomas Purcell's avatar
Thomas Purcell committed
617
    py::list phi0_py();
618
619
620

    // DocString: feat_space_scores_py
    /**
621
     * @brief An array of all stored projection scores from SIS
622
623
624
625
626
     */
    inline np::ndarray scores_py(){return python_conv_utils::to_ndarray<double>(_scores);};

    // DocString: feat_space_task_sizes_py
    /**
627
     * @brief A list of the number of samples in each task for the training data
628
629
630
631
632
     */
    inline py::list task_sizes_py(){return python_conv_utils::to_list<int>(_task_sizes);};

    // DocString: feat_space_allowed_ops_py
    /**
633
     * @brief The list of allowed operators
634
635
636
     */
    inline py::list allowed_ops_py(){return python_conv_utils::to_list<std::string>(_allowed_ops);}

Thomas Purcell's avatar
Thomas Purcell committed
637
    // DocString: feat_space_start_rung_py
638
    /**
639
     * @brief A list containing the index of the first feature of each rung in the feature space.
640
     */
Thomas Purcell's avatar
Thomas Purcell committed
641
    inline py::list start_rung_py(){return python_conv_utils::to_list<int>(_start_rung);}
642
643
644

    // DocString: feat_space_get_feature
    /**
645
     * @brief Access the feature in _phi with an index ind
646
     *
647
     * @param ind (int) The index of the feature to get
648
649
     * @return A ModelNode of the feature at index ind
     */
650
    inline ModelNode get_feature(const int ind) const {return ModelNode(_phi[ind]);}
651
    #endif
Thomas Purcell's avatar
Thomas Purcell committed
652
653
};

654
#endif