FeatureSpace.cpp 39.2 KB
Newer Older
Thomas Purcell's avatar
Thomas Purcell committed
1
#include "feature_creation/feature_space/FeatureSpace.hpp"
2

Thomas Purcell's avatar
Thomas Purcell committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
BOOST_CLASS_EXPORT_GUID(FeatureNode, "FeatureNode")
BOOST_CLASS_EXPORT_GUID(AddNode, "AddNode")
BOOST_CLASS_EXPORT_GUID(SubNode, "SubNode")
BOOST_CLASS_EXPORT_GUID(AbsDiffNode, "AbsDiffNode")
BOOST_CLASS_EXPORT_GUID(MultNode, "MultNode")
BOOST_CLASS_EXPORT_GUID(DivNode, "DivNode")
BOOST_CLASS_EXPORT_GUID(SqNode, "SqNode")
BOOST_CLASS_EXPORT_GUID(SqrtNode, "SqrtNode")
BOOST_CLASS_EXPORT_GUID(CbNode, "CbNode")
BOOST_CLASS_EXPORT_GUID(CbrtNode, "CbrtNode")
BOOST_CLASS_EXPORT_GUID(SixPowNode, "SixPowNode")
BOOST_CLASS_EXPORT_GUID(ExpNode, "ExpNode")
BOOST_CLASS_EXPORT_GUID(NegExpNode, "NegExpNode")
BOOST_CLASS_EXPORT_GUID(LogNode, "LogNode")
BOOST_CLASS_EXPORT_GUID(AbsNode, "AbsNode")
BOOST_CLASS_EXPORT_GUID(InvNode, "InvNode")
BOOST_CLASS_EXPORT_GUID(SinNode, "SinNode")
BOOST_CLASS_EXPORT_GUID(CosNode, "CosNode")
Thomas Purcell's avatar
Thomas Purcell committed
21

22
#ifdef PARAMETERIZE
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
BOOST_CLASS_EXPORT_GUID(AddParamNode, "AddParamNode")
BOOST_CLASS_EXPORT_GUID(SubParamNode, "SubParamNode")
BOOST_CLASS_EXPORT_GUID(AbsDiffParamNode, "AbsDiffParamNode")
BOOST_CLASS_EXPORT_GUID(MultParamNode, "MultParamNode")
BOOST_CLASS_EXPORT_GUID(DivParamNode, "DivParamNode")
BOOST_CLASS_EXPORT_GUID(SqParamNode, "SqParamNode")
BOOST_CLASS_EXPORT_GUID(SqrtParamNode, "SqrtParamNode")
BOOST_CLASS_EXPORT_GUID(CbParamNode, "CbParamNode")
BOOST_CLASS_EXPORT_GUID(CbrtParamNode, "CbrtParamNode")
BOOST_CLASS_EXPORT_GUID(SixPowParamNode, "SixPowParamNode")
BOOST_CLASS_EXPORT_GUID(ExpParamNode, "ExpParamNode")
BOOST_CLASS_EXPORT_GUID(NegExpParamNode, "NegExpParamNode")
BOOST_CLASS_EXPORT_GUID(LogParamNode, "LogParamNode")
BOOST_CLASS_EXPORT_GUID(AbsParamNode, "AbsParamNode")
BOOST_CLASS_EXPORT_GUID(InvParamNode, "InvParamNode")
BOOST_CLASS_EXPORT_GUID(SinParamNode, "SinParamNode")
BOOST_CLASS_EXPORT_GUID(CosParamNode, "CosParamNode")
Thomas Purcell's avatar
Thomas Purcell committed
40

Thomas Purcell's avatar
Thomas Purcell committed
41
FeatureSpace::FeatureSpace(
Thomas Purcell's avatar
Thomas Purcell committed
42
    std::shared_ptr<MPI_Interface> mpi_comm,
Thomas Purcell's avatar
Thomas Purcell committed
43
44
    std::vector<node_ptr> phi_0,
    std::vector<std::string> allowed_ops,
Thomas Purcell's avatar
Thomas Purcell committed
45
    std::vector<std::string> allowed_param_ops,
46
    std::vector<double> prop,
Thomas Purcell's avatar
Thomas Purcell committed
47
    std::vector<int> task_sizes,
48
    std::string project_type,
Thomas Purcell's avatar
Thomas Purcell committed
49
50
    int max_phi,
    int n_sis_select,
Thomas Purcell's avatar
Thomas Purcell committed
51
    int max_store_rung,
52
    int n_rung_generate,
Thomas Purcell's avatar
Thomas Purcell committed
53
    double cross_corr_max,
Thomas Purcell's avatar
Thomas Purcell committed
54
    double min_abs_feat_val,
55
    double max_abs_feat_val,
56
57
    int max_param_depth,
    bool reparam_residual
Thomas Purcell's avatar
Thomas Purcell committed
58
):
59
60
    _phi(phi_0),
    _phi_0(phi_0),
61
    _allowed_param_ops(allowed_param_ops),
62
    _allowed_ops(allowed_ops),
63
    _prop(prop),
64
    _scores(phi_0.size(), 0.0),
Thomas Purcell's avatar
Thomas Purcell committed
65
    _task_sizes(task_sizes),
66
    _start_gen(1, 0),
67
    _feature_space_file("feature_space/selected_features.txt"),
Thomas Purcell's avatar
Thomas Purcell committed
68
    _feature_space_summary_file("feature_space/SIS_summary.txt"),
Thomas Purcell's avatar
Thomas Purcell committed
69
    _project_type(project_type),
Thomas Purcell's avatar
Thomas Purcell committed
70
    _mpi_comm(mpi_comm),
Thomas Purcell's avatar
Thomas Purcell committed
71
    _cross_cor_max(cross_corr_max),
72
73
    _l_bound(min_abs_feat_val),
    _u_bound(max_abs_feat_val),
Thomas Purcell's avatar
Thomas Purcell committed
74
75
76
77
    _max_phi(max_phi),
    _n_sis_select(n_sis_select),
    _n_samp(phi_0[0]->n_samp()),
    _n_feat(phi_0.size()),
78
    _n_rung_store(max_store_rung),
79
    _n_rung_generate(n_rung_generate),
80
81
    _max_param_depth(max_param_depth),
    _reparam_residual(reparam_residual)
Thomas Purcell's avatar
Thomas Purcell committed
82
{
Thomas Purcell's avatar
Thomas Purcell committed
83
    initialize_fs();
84
}
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#else
FeatureSpace::FeatureSpace(
    std::shared_ptr<MPI_Interface> mpi_comm,
    std::vector<node_ptr> phi_0,
    std::vector<std::string> allowed_ops,
    std::vector<double> prop,
    std::vector<int> task_sizes,
    std::string project_type,
    int max_phi,
    int n_sis_select,
    int max_store_rung,
    int n_rung_generate,
    double cross_corr_max,
    double min_abs_feat_val,
    double max_abs_feat_val
):
    _phi(phi_0),
    _phi_0(phi_0),
    _allowed_ops(allowed_ops),
    _prop(prop),
    _scores(phi_0.size(), 0.0),
    _task_sizes(task_sizes),
    _start_gen(1, 0),
    _feature_space_file("feature_space/selected_features.txt"),
    _feature_space_summary_file("feature_space/SIS_summary.txt"),
    _project_type(project_type),
    _mpi_comm(mpi_comm),
    _cross_cor_max(cross_corr_max),
    _l_bound(min_abs_feat_val),
    _u_bound(max_abs_feat_val),
    _max_phi(max_phi),
    _n_sis_select(n_sis_select),
    _n_samp(phi_0[0]->n_samp()),
    _n_feat(phi_0.size()),
    _n_rung_store(max_store_rung),
120
121
122
    _n_rung_generate(n_rung_generate),
    _max_param_depth(0),
    _reparam_residual(false)
123
124
125
126
{
    initialize_fs();
}
#endif
127

Thomas Purcell's avatar
Thomas Purcell committed
128
void FeatureSpace::initialize_fs()
129
{
130
    #ifdef PARAMETERIZE
131
132
133
134
135
136
137
138
139
    if(_max_param_depth == -1)
    {
        _max_param_depth = _max_phi;
    }
    if((_max_param_depth < 0) || (_max_param_depth > _max_phi))
    {
        throw std::logic_error("Invalid parameter depth.");
    }
    nlopt_wrapper::MAX_PARAM_DEPTH = _max_param_depth;
140
141
    #endif

142
    if(_n_rung_store == -1)
143
    {
144
        _n_rung_store = _max_phi - 1;
145
    }
146
    if(_n_rung_generate > 1)
147
    {
148
        throw std::logic_error("A maximum of one rung can be generated on the fly.");
149
    }
150
    else if(_max_phi - _n_rung_generate < _n_rung_store)
151
    {
152
        throw std::logic_error("Requesting to store more rungs than what can be pre-generated.");
153
    }
154

Thomas Purcell's avatar
Thomas Purcell committed
155
156
157
    node_value_arrs::set_task_sz_train(_task_sizes);
    int n_max_ops = 0;
    for(int rr = 0; rr < _max_phi - _n_rung_store; ++rr)
158
    {
Thomas Purcell's avatar
Thomas Purcell committed
159
        n_max_ops += std::pow(2, rr);
160
    }
161

162
    initialize_fs_output_files();
163
164
    project_funcs::set_project_fxn(_project_type, _task_sizes.size(), _project, _project_no_omp);
    comp_feats::set_is_valid_fxn(_project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list);
165
    set_op_lists();
166

167
    double start = omp_get_wtime();
168
    generate_feature_space();
169
170
171
    _mpi_comm->barrier();
    double duration = omp_get_wtime() - start;
    if(_mpi_comm->rank() == 0)
172
    {
173
        std::cout << "time to generate feat sapce: " << duration << " s" << std::endl;
174
    }
175

176
    mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select);
177

178
179
180
    _scores.reserve(_phi.size());
    _scores.resize(_phi.size());
}
181

182
183
void FeatureSpace::set_op_lists()
{
184
    for(auto & op : _allowed_ops)
Thomas Purcell's avatar
Thomas Purcell committed
185
    {
Thomas Purcell's avatar
Thomas Purcell committed
186
        if((op.compare("add") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0) || (op.compare("sub") == 0))
187
        {
Thomas Purcell's avatar
Thomas Purcell committed
188
            _com_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
189
        }
Thomas Purcell's avatar
Thomas Purcell committed
190
        else if((op.compare("div") == 0))
191
        {
Thomas Purcell's avatar
Thomas Purcell committed
192
            _bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
193
        }
Thomas Purcell's avatar
Thomas Purcell committed
194
        else
195
        {
Thomas Purcell's avatar
Thomas Purcell committed
196
            _un_operators.push_back(allowed_op_maps::unary_operator_map[op]);
197
        }
Thomas Purcell's avatar
Thomas Purcell committed
198
    }
199
200

    #ifdef PARAMETERIZE
201
202
203
    for(auto& op : _allowed_param_ops)
    {
        if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("abs_diff") == 0))
204
        {
205
206
207
208
209
210
211
212
213
            _com_bin_param_operators.push_back(allowed_op_maps::binary_param_operator_map[op]);
        }
        else if((op.compare("div") == 0) || (op.compare("mult") == 0))
        {
            _bin_param_operators.push_back(allowed_op_maps::binary_param_operator_map[op]);
        }
        else
        {
            _un_param_operators.push_back(allowed_op_maps::unary_param_operator_map[op]);
214
        }
215
    }
216
    #endif
217
}
218

219
void FeatureSpace::initialize_fs_output_files() const
220
{
Thomas Purcell's avatar
Thomas Purcell committed
221
    if(_mpi_comm->rank() == 0)
222
223
224
225
226
227
228
229
230
231
    {
        std::ofstream out_file_stream = std::ofstream();
        std::ofstream sum_file_stream = std::ofstream();
        out_file_stream.open(_feature_space_file);
        sum_file_stream.open(_feature_space_summary_file);
        out_file_stream << std::setw(14) <<std::left << "# FEAT_ID" << "Feature Postfix Expression (RPN)" << std::endl;
        sum_file_stream << std::setw(14) <<std::left << "# FEAT_ID" << std::setw(24) << std::left << "Score" << "Feature Expression" << std::endl;
        out_file_stream.close();
        sum_file_stream.close();
    }
232
}
233

Thomas Purcell's avatar
Thomas Purcell committed
234
#ifdef PARAMETERIZE
235
236
237
238
239
void FeatureSpace::generate_new_feats(
    std::vector<node_ptr>::iterator& feat,
    std::vector<node_ptr>& feat_set,
    unsigned long int& feat_ind,
    std::shared_ptr<NLOptimizer> optimizer,
240
241
    const double l_bound,
    const double u_bound
242
243
)
{
244

245
246
    unsigned long int phi_ind = feat - _phi.begin();
    feat_set.reserve(feat_set.size() + _un_operators.size() + phi_ind * (_com_bin_operators.size() + 2 * _bin_operators.size()));
247

248
249
250
251
    for(auto& op : _un_operators)
    {
        op(feat_set, *feat, feat_ind, l_bound, u_bound);
    }
252

253
254
255
    for(auto& op : _com_bin_operators)
    {
        for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2)
256
        {
257
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound);
Thomas Purcell's avatar
Thomas Purcell committed
258
        }
259
    }
Thomas Purcell's avatar
Thomas Purcell committed
260

261
262
263
    for(auto& op : _bin_operators)
    {
        for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2)
Thomas Purcell's avatar
Thomas Purcell committed
264
        {
265
266
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound);
            op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound);
267
        }
268
269
270
271
272
    }
    for(auto& op : _un_param_operators)
    {
        op(feat_set, *feat, feat_ind, l_bound, u_bound, optimizer);
    }
273

274
275
276
    for(auto& op : _com_bin_param_operators)
    {
        for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
277
        {
278
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
279
        }
Thomas Purcell's avatar
Thomas Purcell committed
280
    }
281
282
283
    for(auto& op : _bin_param_operators)
    {
        for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
Thomas Purcell's avatar
Thomas Purcell committed
284
        {
285
286
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
            op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
Thomas Purcell's avatar
Thomas Purcell committed
287
        }
288
289
290
291
292
293
294
    }
}
#else
void FeatureSpace::generate_new_feats(
    std::vector<node_ptr>::iterator& feat,
    std::vector<node_ptr>& feat_set,
    unsigned long int& feat_ind,
295
296
    const double l_bound,
    const double u_bound
297
298
299
300
301
302
303
304
305
)
{
    unsigned long int phi_ind = feat - _phi.begin();
    feat_set.reserve(feat_set.size() + _un_operators.size() + phi_ind * (_com_bin_operators.size() + 2 * _bin_operators.size()));

    for(auto& op : _un_operators)
    {
        op(feat_set, *feat, feat_ind, l_bound, u_bound);
    }
Thomas Purcell's avatar
Thomas Purcell committed
306

307
308
309
    for(auto& op : _com_bin_operators)
    {
        for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2)
Thomas Purcell's avatar
Thomas Purcell committed
310
        {
311
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound);
Thomas Purcell's avatar
Thomas Purcell committed
312
        }
313
    }
Thomas Purcell's avatar
Thomas Purcell committed
314

315
316
317
    for(auto& op : _bin_operators)
    {
        for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2)
Thomas Purcell's avatar
Thomas Purcell committed
318
        {
319
320
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound);
            op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound);
Thomas Purcell's avatar
Thomas Purcell committed
321
322
        }
    }
323
}
Thomas Purcell's avatar
Thomas Purcell committed
324
#endif
325

326
void FeatureSpace::generate_feature_space()
Thomas Purcell's avatar
Thomas Purcell committed
327
{
328
329
    double u_bound = 1e50;
    double l_bound = 1e-50;
330
331
    std::vector<int> inds;

332
    for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn)
Thomas Purcell's avatar
Thomas Purcell committed
333
    {
Thomas Purcell's avatar
Thomas Purcell committed
334
        node_value_arrs::clear_temp_reg();
335
336
337
338
339
        if(nn == _max_phi)
        {
            u_bound = _u_bound;
            l_bound = _l_bound;
        }
Thomas Purcell's avatar
Thomas Purcell committed
340
        std::vector<node_ptr> next_phi;
Thomas Purcell's avatar
Thomas Purcell committed
341
        _n_feat = _phi.size();
342

343
        unsigned long int feat_ind = _phi.size();
Thomas Purcell's avatar
Thomas Purcell committed
344

345
        node_value_arrs::clear_temp_reg();
346
        double start = omp_get_wtime();
Thomas Purcell's avatar
Thomas Purcell committed
347
        #ifdef PARAMETERIZE
348
349
350
351
        #pragma omp parallel firstprivate(feat_ind, l_bound, u_bound)
        {
            std::vector<node_ptr> next_phi_private;
            std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth);
Thomas Purcell's avatar
Thomas Purcell committed
352

353
354
355
356
            #pragma omp for schedule(dynamic)
            for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
            {
                generate_new_feats(feat_1, next_phi_private, feat_ind, optimizer, l_bound, u_bound);
Thomas Purcell's avatar
Thomas Purcell committed
357
            }
358
359
360
361

            #pragma omp critical
            next_phi.insert(next_phi.end(), next_phi_private.begin(), next_phi_private.end());
        }
Thomas Purcell's avatar
Thomas Purcell committed
362
        #else
363
364
365
366
367
        #pragma omp parallel firstprivate(feat_ind, l_bound, u_bound)
        {
            std::vector<node_ptr> next_phi_private;
            #pragma omp for schedule(dynamic)
            for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
Thomas Purcell's avatar
Thomas Purcell committed
368
            {
369
                generate_new_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound);
Thomas Purcell's avatar
Thomas Purcell committed
370
            }
371
372
373
374

            #pragma omp critical
            next_phi.insert(next_phi.end(), next_phi_private.begin(), next_phi_private.end());
        }
Thomas Purcell's avatar
Thomas Purcell committed
375
        #endif
376
        _start_gen.push_back(_phi.size());
Thomas Purcell's avatar
Thomas Purcell committed
377
        node_value_arrs::clear_temp_reg();
Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
378
        if((nn < _max_phi) || (nn <= _n_rung_store) || (_mpi_comm->size() == 1))
379
        {
380
381
382
            int new_phi_size;
            int phi_size_start = _phi.size();
            if(_mpi_comm->rank() == 0)
383
            {
384
385
386
387
                std::vector<std::vector<node_ptr>> next_phi_gathered;
                mpi::gather(*_mpi_comm, next_phi, next_phi_gathered, 0);
                feat_ind = _phi.size();
                for(auto& next_phi_vec : next_phi_gathered)
388
                {
389
                    _phi.insert(_phi.end(), next_phi_vec.begin(), next_phi_vec.end());
390
                }
391
                new_phi_size = _phi.size();
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406

                // Sort the features to ensure consistent feature spaces for all MPI/OpenMP configurations
                std::sort(
                    _phi.begin() + _start_gen.back(),
                    _phi.end(),
                    [feat_ind](node_ptr n1, node_ptr n2){return n1->sort_score(feat_ind) < n2->sort_score(feat_ind);}
                );

                // Reindex sorted features
                std::for_each(
                    _phi.begin() + _start_gen.back(),
                    _phi.end(),
                    [&feat_ind](node_ptr n){n->reindex(feat_ind); ++feat_ind;}
                );

407
                mpi::broadcast(*_mpi_comm, new_phi_size, 0);
Thomas Purcell's avatar
Thomas Purcell committed
408

409
                for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
410
                {
411
                    mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
412
                }
413
            }
414
415
416
417
418
419
420
            else
            {
                mpi::gather(*_mpi_comm, next_phi, 0);
                mpi::broadcast(*_mpi_comm, new_phi_size, 0);
                _phi.resize(new_phi_size);

                for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
421
                {
422
                    mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
423
                }
424
            }
Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
425
426

            if(phi_size_start == new_phi_size)
427
428
429
            {
                throw std::logic_error("No features created during this rung (" + std::to_string(nn) + ")");
            }
Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
430

Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
431
            node_value_arrs::clear_temp_reg();
432
433
434
435
            if(nn < _max_phi)
            {
                // Remove identical features
                _scores.resize(_phi.size());
436
                _mpi_comm->barrier();
437
                project_funcs::project_r(_prop.data(), _scores.data(), _phi, _task_sizes, 1);
438
                _scores.erase(_scores.begin(), _scores.begin() + _start_gen[_start_gen.size() - 1]);
439
                inds = util_funcs::argsort<double>(_scores);
440
441
442

                std::vector<int> del_inds;

Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
443
                _mpi_comm->barrier();
444
445
                node_value_arrs::clear_temp_reg();
                for(int sc = 0; sc < _scores.size() - 1; ++sc)
446
                {
447
448
                    #ifdef PARAMETERIZE
                    if(_phi[inds[sc] + _start_gen.back()]->n_params() > 0)
449
                    {
450
                        continue;
451
                    }
452
                    #endif
453

454
                    if(_scores[inds[sc]] > -1e-10)
455
                    {
456
457
458
459
460
461
462
                        double base_val = std::abs(
                            util_funcs::r(
                                _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                _n_samp
                            )
                        );
463
                        for(int sc2 = sc + 1; sc2 < _scores.size(); ++sc2)
464
                        {
465
466
467
468
469
470
471
472
473
                            double comp = std::abs(
                                base_val - std::abs(
                                    util_funcs::r(
                                        _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                        _phi[_start_gen.back() + inds[sc2]]->value_ptr(0, true),
                                        _n_samp
                                    )
                                )
                            );
474
                            if(comp < 1e-10)
475
                            {
476
477
                                del_inds.push_back(-1 * (inds[sc] + _start_gen.back()));
                                break;
478
479
480
                            }
                        }
                    }
481
482
                    else if(_scores[inds[sc + 1]] - _scores[inds[sc]] < 1e-10)
                    {
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
                        double base_val = std::abs(
                            util_funcs::r(
                                _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                _n_samp
                            )
                        );
                        double comp = std::abs(
                            base_val - std::abs(
                                util_funcs::r(
                                    _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                    _phi[_start_gen.back() + inds[sc + 1]]->value_ptr(0, true),
                                    _n_samp
                                )
                            )
                        );
499
                        if(comp < 1e-10)
500
                        {
501
                            del_inds.push_back(-1 * (inds[sc] + _start_gen.back()));
502
                        }
503
                    }
504
505
                }

506
                inds = util_funcs::argsort<int>(del_inds);
507
                for(int ii = 0; ii < inds.size(); ++ii)
508
                {
509
                    _phi.erase(_phi.begin() - del_inds[inds[ii]]);
510
                }
511
512
513

                // Reindex
                for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
514
                {
515
                    _phi[ff]->reindex(ff);
516
                }
517
            }
518
519
            node_value_arrs::clear_temp_reg();

520
521
            if(nn <= _n_rung_store)
            {
522
                node_value_arrs::resize_values_arr(nn, _phi.size(), (nn != _max_phi));
Thomas Purcell's avatar
Thomas Purcell committed
523

Thomas Purcell's avatar
Thomas Purcell committed
524
                for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
525
                {
526
                    _phi[ff]->set_value();
527
528
                    _phi[ff]->set_test_value();
                }
529
            }
530
        }
531
        else
532
        {
533
534
535
536
537
538
539
540
541
542
543
            std::vector<size_t> next_phi_sizes(_mpi_comm->size());
            if(_mpi_comm->rank() == 0)
            {
                mpi::gather(*_mpi_comm, next_phi.size(), next_phi_sizes.data(), 0);
                mpi::broadcast(*_mpi_comm, next_phi_sizes.data(), next_phi_sizes.size(), 0);
            }
            else
            {
                mpi::gather(*_mpi_comm, next_phi.size(), 0);
                mpi::broadcast(*_mpi_comm, next_phi_sizes.data(), next_phi_sizes.size(), 0);
            }
Thomas Purcell's avatar
Thomas Purcell committed
544

545
            size_t n_feat = std::accumulate(next_phi_sizes.begin(), next_phi_sizes.end(), 0);
Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
546
            if(n_feat == 0)
547
548
549
            {
                throw std::logic_error("No features created during this rung (" + std::to_string(nn) + ")");
            }
Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
550

551
552
            size_t n_feat_rank = n_feat / _mpi_comm->size();
            size_t n_feat_below_rank = _mpi_comm->rank() * n_feat_rank;
553
            size_t n_feat_added = 0;
554
555
556
557
558
559
560
561
562
            if(_mpi_comm->rank() < n_feat % _mpi_comm->size())
            {
                ++n_feat_rank;
                n_feat_below_rank += _mpi_comm->rank();
            }
            else
            {
                n_feat_below_rank += n_feat % _mpi_comm->size();
            }
Thomas Purcell's avatar
Thomas Purcell committed
563

564
            while((n_feat_added < n_feat_rank) && (next_phi.size() > 0))
565
566
567
            {
                _phi.push_back(next_phi.back());
                next_phi.pop_back();
568
                ++n_feat_added;
569
            }
Thomas Purcell's avatar
Thomas Purcell committed
570

571
            // This can be calculated without an all_gather, using it to not introduce too many things at one time
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
            std::vector<size_t> next_phi_needed(_mpi_comm->size());
            std::vector<size_t> next_phi_excess(_mpi_comm->size());
            if(_mpi_comm->rank() == 0)
            {
                mpi::gather(*_mpi_comm, next_phi.size(), next_phi_excess.data(), 0);
                mpi::gather(*_mpi_comm, n_feat_rank - n_feat_added, next_phi_needed.data(), 0);

                mpi::broadcast(*_mpi_comm, next_phi_excess.data(), next_phi_excess.size(), 0);
                mpi::broadcast(*_mpi_comm, next_phi_needed.data(), next_phi_needed.size(), 0);
            }
            else
            {
                mpi::gather(*_mpi_comm, next_phi.size(), 0);
                mpi::gather(*_mpi_comm, n_feat_rank - n_feat_added, 0);

                mpi::broadcast(*_mpi_comm, next_phi_excess.data(), next_phi_excess.size(), 0);
                mpi::broadcast(*_mpi_comm, next_phi_needed.data(), next_phi_needed.size(), 0);
            }
590
591
592
593
594
595
596
597
598
599

            std::vector<size_t> send_sizes(next_phi_sizes.size(), 0);
            std::vector<size_t> recv_sizes(next_phi_sizes.size(), 0);
            // Is this rank sending or receiving?
            if(next_phi_excess[_mpi_comm->rank()] > 0)
            {
                size_t total_sent = std::accumulate(next_phi_excess.begin(), next_phi_excess.begin() + _mpi_comm->rank(), 0);
                size_t prev_sent_recv = 0;
                size_t send_size = 0;
                int ind = 0;
Thomas Purcell's avatar
Thomas Purcell committed
600
                while((prev_sent_recv <= total_sent) && (ind < _mpi_comm->size()))
601
602
603
604
                {
                    prev_sent_recv += next_phi_needed[ind];
                    ++ind;
                }
Thomas Purcell's avatar
Thomas Purcell committed
605
                send_size = std::min(next_phi.size(), prev_sent_recv - total_sent);
Thomas Purcell's avatar
Thomas Purcell committed
606
                send_sizes[ind-1] = send_size;
607
608
609
610
611
612
                total_sent = send_size;
                while((total_sent < next_phi.size()) && (ind < _mpi_comm->size()))
                {
                    send_size = std::min(next_phi.size() - total_sent, next_phi_needed[ind]);
                    send_sizes[ind] = send_size;
                    total_sent += send_size;
613
                    ++ind;
614
                }
Thomas Purcell's avatar
Thomas Purcell committed
615

616
617
618
619
620
621
622
623
624
625
626
627
628
629
                total_sent = 0;
                for(int pp = 0; pp < send_sizes.size(); ++pp)
                {
                    if(send_sizes[pp] == 0)
                        continue;

                    std::vector<node_ptr> to_send(send_sizes[pp]);
                    std::copy_n(next_phi.begin() + total_sent, send_sizes[pp], to_send.begin());
                    _mpi_comm->send(pp, _mpi_comm->cantorTagGen(_mpi_comm->rank(), pp, 1, 0), to_send);
                    total_sent += send_sizes[pp];
                }
            }
            else
            {
Thomas Purcell's avatar
Thomas Purcell committed
630
                size_t total_recv = std::accumulate(next_phi_needed.begin(), next_phi_needed.begin() + _mpi_comm->rank(), 0);
631
632
633
                size_t prev_recv_sent = 0;
                size_t recv_size = 0;
                int ind = 0;
Thomas Purcell's avatar
Thomas Purcell committed
634
                while((prev_recv_sent <= total_recv) && (ind < _mpi_comm->size()))
635
636
637
638
                {
                    prev_recv_sent += next_phi_excess[ind];
                    ++ind;
                }
Thomas Purcell's avatar
Thomas Purcell committed
639
                recv_size = std::min(n_feat_rank - n_feat_added, prev_recv_sent - total_recv);
Thomas Purcell's avatar
Thomas Purcell committed
640
                recv_sizes[ind-1] = recv_size;
641
                total_recv = recv_size;
Thomas Purcell's avatar
Thomas Purcell committed
642
                while((total_recv < n_feat_rank - n_feat_added) && (ind < _mpi_comm->size()))
643
                {
Thomas Purcell's avatar
Thomas Purcell committed
644
                    recv_size = std::min(n_feat_rank - n_feat_added - total_recv, next_phi_excess[ind]);
645
646
                    recv_sizes[ind] = recv_size;
                    total_recv += recv_size;
647
                    ++ind;
648
649
650
651
652
653
                }

                total_recv = 0;
                for(int pp = 0; pp < recv_sizes.size(); ++pp)
                {
                    if(recv_sizes[pp] == 0)
654
                    {
655
                        continue;
656
                    }
657
658

                    std::vector<node_ptr> to_recv;
Thomas Purcell's avatar
Thomas Purcell committed
659
                    _mpi_comm->recv(pp, _mpi_comm->cantorTagGen(pp, _mpi_comm->rank(), 1, 0), to_recv);
660
                    for(auto& feat : to_recv)
661
                    {
662
                        _phi.push_back(feat);
663
                    }
664
665
666
                }
            }

667
            #pragma omp parallel for
Thomas Purcell's avatar
Thomas Purcell committed
668
669
670
671
            for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
            {
                _phi[ff]->reindex(ff + n_feat_below_rank, ff);
                _phi[ff]->set_value();
672
                _phi[ff]->set_test_value();
673
            }
674
        }
675
    }
676
    _n_feat = _phi.size();
Thomas Purcell's avatar
Thomas Purcell committed
677
678
}

679
void FeatureSpace::project_generated(const double* prop, const int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel)
680
{
681
    std::vector<double> scores_sel_all(node_value_arrs::N_SELECTED);
682
683
    if(node_value_arrs::N_SELECTED > _n_sis_select)
    {
684
685
        scores_sel_all.resize(_phi_selected.size());
        _project(prop, scores_sel_all.data(), _phi_selected, _task_sizes, size / _n_samp);
686
    }
687
    std::copy_n(scores_sel.data(), scores_sel.size(), &scores_sel_all[node_value_arrs::N_SELECTED - _n_sis_select]);
688

689
690
691
    int worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
    double worst_score = scores_sel[worst_score_ind];

692
    #pragma omp parallel firstprivate(worst_score, worst_score_ind, prop, size, scores_sel_all)
693
    {
694
695
        std::vector<node_ptr> phi_sel_private(phi_sel);
        std::vector<double> scores_sel_private(scores_sel);
696
        int index_base = _phi.size() + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size());
697

Thomas Purcell's avatar
Thomas Purcell committed
698
        #ifdef PARAMETERIZE
699
700
701
702
703
704
705
706
707
708
709
        std::shared_ptr<NLOptimizer> optimizer;
        if(_reparam_residual)
        {
            std::vector<double> prop_vec(size, 0.0);
            std::copy_n(prop, size, prop_vec.data());
            optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop_vec, _max_phi, _max_param_depth);
        }
        else
        {
            optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth);
        }
Thomas Purcell's avatar
Thomas Purcell committed
710
711
        #endif

712
        #pragma omp for schedule(dynamic)
713
        for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
714
        {
715
            unsigned long int feat_ind = _phi.size() + _n_sis_select * (omp_get_num_threads() + _mpi_comm->size());
716

717
            node_value_arrs::clear_temp_reg_thread();
718
            std::vector<node_ptr> generated_phi;
Thomas Purcell's avatar
Bug fix    
Thomas Purcell committed
719
720
721

            bool is_sel = (*feat)->selected();
            (*feat)->set_selected(false);
Thomas Purcell's avatar
Thomas Purcell committed
722
            #ifdef PARAMETERIZE
723
            generate_new_feats(feat, generated_phi, feat_ind, optimizer, _l_bound, _u_bound);
Thomas Purcell's avatar
Thomas Purcell committed
724
            #else
725
            generate_new_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound);
Thomas Purcell's avatar
Thomas Purcell committed
726
            #endif
Thomas Purcell's avatar
Bug fix    
Thomas Purcell committed
727
            (*feat)->set_selected(is_sel);
Thomas Purcell's avatar
Thomas Purcell committed
728

729
            if(generated_phi.size() == 0)
730
            {
731
                continue;
732
            }
733

Thomas Purcell's avatar
Bug fix    
Thomas Purcell committed
734
            node_value_arrs::clear_temp_reg_thread();
735
736
            std::vector<double> scores(generated_phi.size());
            _project_no_omp(prop, scores.data(), generated_phi, _task_sizes, size / _n_samp);
737

738
            std::vector<int> inds = util_funcs::argsort<double>(scores);
739

740
741
            int ii = 0;
            while((ii < inds.size()) && (scores[inds[ii]] < -1.0))
742
            {
743
                ++ii;
744
            }
745

Thomas Purcell's avatar
Thomas Purcell committed
746
            while((ii < inds.size()) && ((scores[inds[ii]] < worst_score) || (phi_sel_private.size() < _n_sis_select)))
747
            {
748
                double cur_score = scores[inds[ii]];
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
                bool valid_feat = _is_valid(
                    generated_phi[inds[ii]]->value_ptr(0),
                    _n_samp,
                    _cross_cor_max,
                    scores_sel_all,
                    cur_score,
                    node_value_arrs::N_SELECTED - _n_sis_select,
                    0
                );
                valid_feat = valid_feat && _is_valid_feat_list(
                    generated_phi[inds[ii]]->value_ptr(0),
                    _n_samp,
                    _cross_cor_max,
                    phi_sel_private,
                    scores_sel_private,
                    cur_score
                );
                if(valid_feat)
767
                {
768
                    if(scores_sel_private.size() == _n_sis_select)
769
                    {
770
                        generated_phi[inds[ii]]->reindex(index_base + worst_score_ind);
Thomas Purcell's avatar
Thomas Purcell committed
771
                        generated_phi[inds[ii]]->set_value();
772
773
774
775
776
                        phi_sel_private[worst_score_ind] = generated_phi[inds[ii]];
                        scores_sel_private[worst_score_ind] = cur_score;
                    }
                    else
                    {
777
                        generated_phi[inds[ii]]->reindex(index_base + scores_sel_private.size());
Thomas Purcell's avatar
Thomas Purcell committed
778
                        generated_phi[inds[ii]]->set_value();
779
780
                        phi_sel_private.push_back(generated_phi[inds[ii]]);
                        scores_sel_private.push_back(cur_score);
781
                    }
782
783
                    worst_score_ind = std::max_element(scores_sel_private.begin(), scores_sel_private.end()) - scores_sel_private.begin();
                    worst_score = scores_sel_private[worst_score_ind];
784
                }
785
                ++ii;
786
787
            }
        }
Thomas Purcell's avatar
Thomas Purcell committed
788

789
790
        #pragma omp critical
        {
791
            index_base = _phi.size() + _n_sis_select * _mpi_comm->rank();
792
793
794
            worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
            for(int sc = 0; sc < scores_sel_private.size(); ++sc)
            {
795
796
797
798
                if(
                    ((phi_sel.size() < _n_sis_select) || (scores_sel_private[sc] < scores_sel[worst_score_ind])) &&
                    _is_valid_feat_list(phi_sel_private[sc]->value_ptr(), _n_samp, _cross_cor_max, phi_sel, scores_sel, scores_sel_private[sc])
                )
799
                {
Thomas Purcell's avatar
Thomas Purcell committed
800
801
802
803
                    if(phi_sel.size() == _n_sis_select)
                    {
                        scores_sel[worst_score_ind] = scores_sel_private[sc];
                        phi_sel[worst_score_ind] = phi_sel_private[sc];
804
805
806
807
                        if(phi_sel[worst_score_ind]->rung() == _max_phi)
                        {
                            phi_sel[worst_score_ind]->reindex(index_base + worst_score_ind);
                        }
Thomas Purcell's avatar
Thomas Purcell committed
808
809
810
811
812
                    }
                    else
                    {
                        scores_sel.push_back(scores_sel_private[sc]);
                        phi_sel.push_back(phi_sel_private[sc]);
813
814
815
816
                        if(phi_sel.back()->rung() == _max_phi)
                        {
                            phi_sel.back()->reindex(index_base + phi_sel.size());
                        }
Thomas Purcell's avatar
Thomas Purcell committed
817
                    }
818
                    worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
819
820
821
822
823
824
                }
            }
        }
    }
}

825
void FeatureSpace::sis(const std::vector<double>& prop)
Thomas Purcell's avatar
Thomas Purcell committed
826
{
827
828
    // Reparameterize for the residuals
#ifdef PARAMETERIZE
829
    if(_reparam_residual && (_phi_selected.size() > 0))
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
    {
        // Make a hard copy of the previously selected features
        for(int ff = _phi_selected.size() - _n_sis_select; ff < _phi_selected.size(); ++ff)
        {
            _phi_selected[ff] = _phi_selected[ff]->hard_copy();
        }

        // Reparameterize based on residuals
        #pragma omp parallel
        {
            std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop, _max_phi, _max_param_depth);
            #pragma omp for schedule(dynamic)
            for(int ff = _start_gen[1]; ff < _phi.size(); ++ff)
            {
                if(_phi[ff]->n_params() > 0)
                {
                    _phi[ff]->get_parameters(optimizer);
                }
            }
        }
    }
#endif
852
    // Create output directories if needed
853
854
855
    boost::filesystem::path p(_feature_space_file.c_str());
    boost::filesystem::create_directories(p.remove_filename());

856
    // Set up output file generation
857
858
    std::ofstream out_file_stream = std::ofstream();
    out_file_stream.open(_feature_space_file, std::ios::app);
Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
859

Thomas Purcell's avatar
Thomas Purcell committed
860
861
    std::ofstream sum_file_stream = std::ofstream();
    sum_file_stream.open(_feature_space_summary_file, std::ios::app);
862

863
    // Set up selection data structures
864
    std::vector<node_ptr> phi_sel;
Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
865
    std::vector<double> scores_sel(_n_sis_select, std::numeric_limits<double>::infinity());
866
    phi_sel.reserve(_n_sis_select);
Thomas Purcell's avatar
Thomas Purcell committed
867

868
869
    int cur_feat = node_value_arrs::N_SELECTED;
    node_value_arrs::resize_d_matrix_arr(_n_sis_select);
870
    _phi_selected.reserve(_phi_selected.size() + _n_sis_select);
Thomas Purcell's avatar