Skip to content
Snippets Groups Projects
FeatureSpace.cpp 52.51 KiB
// Copyright 2021 Thomas A. R. Purcell

// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at

//     http://www.apache.org/licenses/LICENSE-2.0

// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "feature_creation/feature_space/FeatureSpace.hpp"

BOOST_CLASS_EXPORT_GUID(FeatureNode, "FeatureNode")
BOOST_CLASS_EXPORT_GUID(AddNode, "AddNode")
BOOST_CLASS_EXPORT_GUID(SubNode, "SubNode")
BOOST_CLASS_EXPORT_GUID(AbsDiffNode, "AbsDiffNode")
BOOST_CLASS_EXPORT_GUID(MultNode, "MultNode")
BOOST_CLASS_EXPORT_GUID(DivNode, "DivNode")
BOOST_CLASS_EXPORT_GUID(SqNode, "SqNode")
BOOST_CLASS_EXPORT_GUID(SqrtNode, "SqrtNode")
BOOST_CLASS_EXPORT_GUID(CbNode, "CbNode")
BOOST_CLASS_EXPORT_GUID(CbrtNode, "CbrtNode")
BOOST_CLASS_EXPORT_GUID(SixPowNode, "SixPowNode")
BOOST_CLASS_EXPORT_GUID(ExpNode, "ExpNode")
BOOST_CLASS_EXPORT_GUID(NegExpNode, "NegExpNode")
BOOST_CLASS_EXPORT_GUID(LogNode, "LogNode")
BOOST_CLASS_EXPORT_GUID(AbsNode, "AbsNode")
BOOST_CLASS_EXPORT_GUID(InvNode, "InvNode")
BOOST_CLASS_EXPORT_GUID(SinNode, "SinNode")
BOOST_CLASS_EXPORT_GUID(CosNode, "CosNode")

#ifdef PARAMETERIZE
BOOST_CLASS_EXPORT_GUID(AddParamNode, "AddParamNode")
BOOST_CLASS_EXPORT_GUID(SubParamNode, "SubParamNode")
BOOST_CLASS_EXPORT_GUID(AbsDiffParamNode, "AbsDiffParamNode")
BOOST_CLASS_EXPORT_GUID(MultParamNode, "MultParamNode")
BOOST_CLASS_EXPORT_GUID(DivParamNode, "DivParamNode")
BOOST_CLASS_EXPORT_GUID(SqParamNode, "SqParamNode")
BOOST_CLASS_EXPORT_GUID(SqrtParamNode, "SqrtParamNode")
BOOST_CLASS_EXPORT_GUID(CbParamNode, "CbParamNode")
BOOST_CLASS_EXPORT_GUID(CbrtParamNode, "CbrtParamNode")
BOOST_CLASS_EXPORT_GUID(SixPowParamNode, "SixPowParamNode")
BOOST_CLASS_EXPORT_GUID(ExpParamNode, "ExpParamNode")
BOOST_CLASS_EXPORT_GUID(NegExpParamNode, "NegExpParamNode")
BOOST_CLASS_EXPORT_GUID(LogParamNode, "LogParamNode")
BOOST_CLASS_EXPORT_GUID(AbsParamNode, "AbsParamNode")
BOOST_CLASS_EXPORT_GUID(InvParamNode, "InvParamNode")
BOOST_CLASS_EXPORT_GUID(SinParamNode, "SinParamNode")
BOOST_CLASS_EXPORT_GUID(CosParamNode, "CosParamNode")

FeatureSpace::FeatureSpace(
    std::shared_ptr<MPI_Interface> mpi_comm,
    std::vector<node_ptr> phi_0,
    std::vector<std::string> allowed_ops,
    std::vector<std::string> allowed_param_ops,
    std::vector<double> prop,
    std::vector<int> task_sizes,
    std::string project_type,
    int max_phi,
    int n_sis_select,
    int max_store_rung,
    int n_rung_generate,
    double cross_corr_max,
    double min_abs_feat_val,
    double max_abs_feat_val,
    int max_param_depth,
    bool reparam_residual
):
    _phi(phi_0),
    _phi_0(phi_0),
    _end_no_params(1, phi_0.size()),
    _start_gen_reparam(1, 0),
    _allowed_param_ops(allowed_param_ops),
    _allowed_ops(allowed_ops),
    _prop(prop),
    _scores(phi_0.size(), 0.0),
    _task_sizes(task_sizes),
    _start_gen(1, 0),
    _feature_space_file("feature_space/selected_features.txt"),
    _feature_space_summary_file("feature_space/SIS_summary.txt"),
    _project_type(project_type),
    _mpi_comm(mpi_comm),
    _cross_cor_max(cross_corr_max),
    _l_bound(min_abs_feat_val),
    _u_bound(max_abs_feat_val),
    _max_phi(max_phi),
    _n_sis_select(n_sis_select),
    _n_samp(phi_0[0]->n_samp()),
    _n_feat(phi_0.size()),
    _n_rung_store(max_store_rung),
    _n_rung_generate(n_rung_generate),
    _max_param_depth(max_param_depth),
    _reparam_residual(reparam_residual)
{
    initialize_fs();
}
#else
FeatureSpace::FeatureSpace(
    std::shared_ptr<MPI_Interface> mpi_comm,
    std::vector<node_ptr> phi_0,
    std::vector<std::string> allowed_ops,
    std::vector<double> prop,
    std::vector<int> task_sizes,
    std::string project_type,
    int max_phi,
    int n_sis_select,
    int max_store_rung,
    int n_rung_generate,
    double cross_corr_max,
    double min_abs_feat_val,
    double max_abs_feat_val
):
    _phi(phi_0),
    _phi_0(phi_0),
    _allowed_ops(allowed_ops),
    _prop(prop),
    _scores(phi_0.size(), 0.0),
    _task_sizes(task_sizes),
    _start_gen(1, 0),
    _feature_space_file("feature_space/selected_features.txt"),
    _feature_space_summary_file("feature_space/SIS_summary.txt"),
    _project_type(project_type),
    _mpi_comm(mpi_comm),
    _cross_cor_max(cross_corr_max),
    _l_bound(min_abs_feat_val),
    _u_bound(max_abs_feat_val),
    _max_phi(max_phi),
    _n_sis_select(n_sis_select),
    _n_samp(phi_0[0]->n_samp()),
    _n_feat(phi_0.size()),
    _n_rung_store(max_store_rung),
    _n_rung_generate(n_rung_generate),
    _max_param_depth(0),
    _reparam_residual(false)
{
    initialize_fs();
}
#endif

void FeatureSpace::initialize_fs()
{
    #ifdef PARAMETERIZE
    if(_max_param_depth == -1)
    {
        _max_param_depth = _max_phi;
    }
    if((_max_param_depth < 0) || (_max_param_depth > _max_phi))
    {
        throw std::logic_error("Invalid parameter depth.");
    }
    nlopt_wrapper::MAX_PARAM_DEPTH = _max_param_depth;
    #endif

    for(int ff = _phi.size() - 1; ff >= 0; --ff)
    {
        if(_phi[ff]->is_const())
        {
            std::cerr << "WARNING: Primary Feature " << _phi[ff]->expr() << " is constant. Removing it from the feature space" << std::endl;
            std::fill_n(_phi[ff]->value_ptr(), _n_samp, -1.0);
            _phi.erase(_phi.begin() + ff);
        }
        else
        {
            _phi[ff]->set_value();
        }
    }
    _n_feat = _phi.size();
    node_value_arrs::resize_values_arr(0, _n_feat);
    for(int ff = 0; ff < _phi.size(); ++ff)
    {
        _phi[ff]->reindex(_phi[ff]->feat_ind(), ff);
        _phi[ff]->set_value();
    }

    if(_n_rung_store == -1)
    {
        _n_rung_store = _max_phi - 1;
    }
    if(_n_rung_generate > 1)
    {
        throw std::logic_error("A maximum of one rung can be generated on the fly.");
    }
    else if(_max_phi - _n_rung_generate < _n_rung_store)
    {
        throw std::logic_error("Requesting to store more rungs than what can be pre-generated.");
    }

    node_value_arrs::set_task_sz_train(_task_sizes);
    int n_max_ops = 0;
    for(int rr = 0; rr < _max_phi - _n_rung_store; ++rr)
    {
        n_max_ops += std::pow(2, rr);
    }

    initialize_fs_output_files();
    comp_feats::set_is_valid_fxn(_project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list);
    set_op_lists();

    double start = omp_get_wtime();
    generate_feature_space();
    _mpi_comm->barrier();
    double duration = omp_get_wtime() - start;
    if(_mpi_comm->rank() == 0)
    {
        std::cout << "time to generate feat sapce: " << duration << " s" << std::endl;
    }
    mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select);

    _scores.reserve(_phi.size());
    _scores.resize(_phi.size());
}

void FeatureSpace::set_op_lists()
{
    for(auto & op : _allowed_ops)
    {
        if((op.compare("add") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0) || (op.compare("sub") == 0))
        {
            _com_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
        }
        else if((op.compare("div") == 0))
        {
            _bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
        }
        else
        {
            _un_operators.push_back(allowed_op_maps::unary_operator_map[op]);
        }
    }

    #ifdef PARAMETERIZE
    for(auto& op : _allowed_param_ops)
    {
        if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("abs_diff") == 0))
        {
            _com_bin_param_operators.push_back(allowed_op_maps::binary_param_operator_map[op]);
        }
        else if((op.compare("div") == 0) || (op.compare("mult") == 0))
        {
            _bin_param_operators.push_back(allowed_op_maps::binary_param_operator_map[op]);
        }
        else
        {
            _un_param_operators.push_back(allowed_op_maps::unary_param_operator_map[op]);
        }
    }
    #endif
}

void FeatureSpace::initialize_fs_output_files() const
{
    if(_mpi_comm->rank() == 0)
    {
        std::ofstream out_file_stream = std::ofstream();
        std::ofstream sum_file_stream = std::ofstream();
        out_file_stream.open(_feature_space_file);
        sum_file_stream.open(_feature_space_summary_file);
        out_file_stream << std::setw(14) <<std::left << "# FEAT_ID" << "Feature Postfix Expression (RPN)" << std::endl;
        sum_file_stream << std::setw(14) <<std::left << "# FEAT_ID" << std::setw(24) << std::left << "Score" << "Feature Expression" << std::endl;
        out_file_stream.close();
        sum_file_stream.close();
    }
}

#ifdef PARAMETERIZE
void FeatureSpace::generate_param_feats(
    std::vector<node_ptr>::iterator& feat,
    std::vector<node_ptr>& feat_set,
    const std::vector<node_ptr>::iterator& start,
    unsigned long int& feat_ind,
    std::shared_ptr<NLOptimizer> optimizer,
    const double l_bound,
    const double u_bound
)
{
    unsigned long int phi_ind = feat - _phi.begin();
    feat_set.reserve(feat_set.size() + _un_param_operators.size() + phi_ind * (_com_bin_param_operators.size() + 2 * _bin_param_operators.size()));

    for(auto& op : _un_param_operators)
    {
        op(feat_set, *feat, feat_ind, l_bound, u_bound, optimizer);
    }

    for(auto& op : _com_bin_param_operators)
    {
        for(auto feat_2 = start; feat_2 != feat; ++feat_2)
        {
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
        }
    }
    for(auto& op : _bin_param_operators)
    {
        for(auto feat_2 = start; feat_2 != feat; ++feat_2)
        {
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
            op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
        }
    }
}

void FeatureSpace::generate_reparam_feats(
    std::vector<node_ptr>::iterator& feat,
    std::vector<node_ptr>& feat_set,
    unsigned long int& feat_ind,
    std::shared_ptr<NLOptimizer> optimizer,
    const double l_bound,
    const double u_bound
)
{
    int cur_rung = (*feat)->rung();
    unsigned long int max_n_feat = _end_no_params[cur_rung] + _phi_reparam.size();
    feat_set.reserve(feat_set.size() + _un_operators.size() + max_n_feat * (_com_bin_operators.size() + 2 * _bin_operators.size()));

    for(auto& op : _un_param_operators)
    {
        op(feat_set, *feat, feat_ind, l_bound, u_bound, optimizer);
    }

    for(auto& op : _com_bin_param_operators)
    {
        for(int rr = 0; rr <= cur_rung; ++rr)
        {
            for(auto feat_2 = _phi.begin() + _start_gen[rr]; (feat_2 != feat) || (feat_2 != _phi.begin() + _end_no_params[rr]); ++feat_2)
            {
                op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
            }
        }
        for(auto feat_2 = _phi_reparam.begin(); (feat_2 != feat) || (feat_2 != _phi_reparam.end()); ++feat_2)
        {
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
        }
    }
    for(auto& op : _bin_param_operators)
    {
        for(int rr = 0; rr <= cur_rung; ++rr)
        {
            for(auto feat_2 = _phi.begin() + _start_gen[rr]; (feat_2 != feat) || (feat_2 != _phi.begin() + _end_no_params[rr]); ++feat_2)
            {
                op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
                op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
            }
        }

        for(auto feat_2 = _phi_reparam.begin(); (feat_2 != feat) || (feat_2 != _phi_reparam.end()); ++feat_2)
        {
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
            op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
        }
    }
}

void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop)
{
    double u_bound = 1e50;
    double l_bound = 1e-50;
    std::vector<int> inds;

    for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn)
    {
        node_value_arrs::clear_temp_reg();
        if(nn == _max_phi)
        {
            u_bound = _u_bound;
            l_bound = _l_bound;
        }
        std::vector<node_ptr> next_phi;
        _n_feat = _phi.size();

        unsigned long int feat_ind = _phi.size() + _phi_reparam.size();

        node_value_arrs::clear_temp_reg();
        double start = omp_get_wtime();
        #pragma omp parallel firstprivate(feat_ind, l_bound, u_bound)
        {
            std::vector<node_ptr> next_phi_private;
            std::shared_ptr<NLOptimizer> optimizer_param = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth);
            std::shared_ptr<NLOptimizer> optimizer_reparam = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop, _max_phi, _max_param_depth);

#ifdef OMP45
            #pragma omp for schedule(monotonic: dynamic)
#else
            #pragma omp for schedule(dynamic)
#endif
            for(auto feat_1 = _phi_reparam.begin() + _start_gen_reparam.back() + _mpi_comm->rank(); feat_1 < _phi_reparam.end(); feat_1 += _mpi_comm->size())
            {
                generate_non_param_feats(feat_1, next_phi_private, _phi_reparam.begin(), feat_ind, l_bound, u_bound);
                generate_param_feats(feat_1, next_phi_private, _phi_reparam.begin(), feat_ind, optimizer_param, l_bound, u_bound);
            }

#ifdef OMP45
            #pragma omp for schedule(monotonic: dynamic)
#else
            #pragma omp for schedule(dynamic)
#endif
            for(auto feat_1 = _phi.begin() + _start_gen[nn-1] + _mpi_comm->rank(); feat_1 < _phi.begin() + _end_no_params[nn-1]; feat_1 += _mpi_comm->size())
            {
                generate_reparam_feats(feat_1, next_phi_private, feat_ind, optimizer_reparam, l_bound, u_bound);
            }

            #pragma omp critical
            next_phi.insert(next_phi.end(), next_phi_private.begin(), next_phi_private.end());
        }
        _start_gen_reparam.push_back(_phi_reparam.size());
        node_value_arrs::clear_temp_reg();
        if(nn < _max_phi)
        {
            int new_phi_size;
            int phi_size_start = _phi_reparam.size();
            if(_mpi_comm->rank() == 0)
            {
                std::vector<std::vector<node_ptr>> next_phi_gathered;
                mpi::gather(*_mpi_comm, next_phi, next_phi_gathered, 0);
                feat_ind = _phi_reparam.size();
                for(auto& next_phi_vec : next_phi_gathered)
                {
                    _phi_reparam.insert(_phi_reparam.end(), next_phi_vec.begin(), next_phi_vec.end());
                }
                new_phi_size = _phi_reparam.size();

                // Sort the features to ensure consistent feature spaces for all MPI/OpenMP configurations
                std::sort(
                    _phi_reparam.begin() + _start_gen_reparam.back(),
                    _phi_reparam.end(),
                    [feat_ind](node_ptr n1, node_ptr n2){return n1->sort_score(feat_ind) < n2->sort_score(feat_ind);}
                );

                // Reindex sorted features
                std::for_each(
                    _phi_reparam.begin() + _start_gen_reparam.back(),
                    _phi_reparam.end(),
                    [&feat_ind](node_ptr n){n->reindex(feat_ind); ++feat_ind;}
                );

                mpi::broadcast(*_mpi_comm, new_phi_size, 0);

                for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
                {
                    mpi::broadcast(*_mpi_comm, &_phi_reparam[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
                }
            }
            else
            {
                mpi::gather(*_mpi_comm, next_phi, 0);
                mpi::broadcast(*_mpi_comm, new_phi_size, 0);
                _phi_reparam.resize(new_phi_size);

                for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
                {
                    mpi::broadcast(*_mpi_comm, &_phi_reparam[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
                }
            }

            if(phi_size_start == new_phi_size)
            {
                throw std::logic_error("No features created during this rung (" + std::to_string(nn) + ")");
            }

            node_value_arrs::clear_temp_reg();
            if(nn < _max_phi)
            {
                // Remove identical features
                std::vector<double> scores(_phi_reparam.size());
                _mpi_comm->barrier();
                project_funcs::project_r(_prop.data(), scores.data(), _phi_reparam, _task_sizes, 1);
                scores.erase(scores.begin(), scores.begin() + _start_gen_reparam.back());
                inds = util_funcs::argsort<double>(scores);

                std::vector<int> del_inds;

                _mpi_comm->barrier();
                node_value_arrs::clear_temp_reg();
                for(int sc = 0; sc < scores.size() - 1; ++sc)
                {
                    #ifdef PARAMETERIZE
                    if(_phi_reparam[inds[sc] + _start_gen_reparam.back()]->n_params() > 0)
                    {
                        continue;
                    }
                    #endif

                    if(scores[inds[sc]] > -1e-10)
                    {
                        double base_val = std::abs(
                            util_funcs::r(
                                _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
                                _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
                                _n_samp
                            )
                        );
                        for(int sc2 = sc + 1; sc2 < scores.size(); ++sc2)
                        {
                            double comp = std::abs(
                                base_val - std::abs(
                                    util_funcs::r(
                                        _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
                                        _phi_reparam[_start_gen_reparam.back() + inds[sc2]]->value_ptr(0, true),
                                        _n_samp
                                    )
                                )
                            );
                            if(comp < 1e-10)
                            {
                                del_inds.push_back(-1 * (inds[sc] + _start_gen_reparam.back()));
                                break;
                            }
                        }
                    }
                    else if(scores[inds[sc + 1]] - scores[inds[sc]] < 1e-10)
                    {
                        double base_val = std::abs(
                            util_funcs::r(
                                _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
                                _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
                                _n_samp
                            )
                        );
                        double comp = std::abs(
                            base_val - std::abs(
                                util_funcs::r(
                                    _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
                                    _phi_reparam[_start_gen_reparam.back() + inds[sc + 1]]->value_ptr(0, true),
                                    _n_samp
                                )
                            )
                        );
                        if(comp < 1e-10)
                        {
                            del_inds.push_back(-1 * (inds[sc] + _start_gen.back()));
                        }
                    }
                }

                inds = util_funcs::argsort<int>(del_inds);
                for(int ii = 0; ii < inds.size(); ++ii)
                {
                    _phi_reparam.erase(_phi_reparam.begin() - del_inds[inds[ii]]);
                }

                // Reindex
                for(int ff = _start_gen_reparam.back(); ff < _phi_reparam.size(); ++ff)
                {
                    _phi_reparam[ff]->reindex(ff + _n_feat);
                }
            }
        }
    }
}
#endif

void FeatureSpace::generate_non_param_feats(
    std::vector<node_ptr>::iterator& feat,
    std::vector<node_ptr>& feat_set,
    const std::vector<node_ptr>::iterator& start,
    unsigned long int& feat_ind,
    const double l_bound,
    const double u_bound
)
{
    unsigned long int phi_ind = feat - start;
    feat_set.reserve(feat_set.size() + _un_operators.size() + phi_ind * (_com_bin_operators.size() + 2 * _bin_operators.size()));

    for(auto& op : _un_operators)
    {
        op(feat_set, *feat, feat_ind, l_bound, u_bound);
    }

    for(auto& op : _com_bin_operators)
    {
        for(auto feat_2 = start; feat_2 < feat; ++feat_2)
        {
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound);
        }
    }

    for(auto& op : _bin_operators)
    {
        for(auto feat_2 = start; feat_2 < feat; ++feat_2)
        {
            op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound);
            op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound);
        }
    }
}

void FeatureSpace::generate_feature_space()
{
    double u_bound = 1e50;
    double l_bound = 1e-50;
    std::vector<int> inds;

    for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn)
    {
        node_value_arrs::clear_temp_reg();
        if(nn == _max_phi)
        {
            u_bound = _u_bound;
            l_bound = _l_bound;
        }
        std::vector<node_ptr> next_phi;
        _n_feat = _phi.size();

        unsigned long int feat_ind = _phi.back()->feat_ind();

        node_value_arrs::clear_temp_reg();
        double start = omp_get_wtime();
        #ifdef PARAMETERIZE
        #pragma omp parallel firstprivate(feat_ind, l_bound, u_bound)
        {
            std::vector<node_ptr> next_phi_private;
            std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth);

#ifdef OMP45
            #pragma omp for schedule(monotonic: dynamic)
#else
            #pragma omp for schedule(dynamic)
#endif
            for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
            {
                generate_non_param_feats(feat_1, next_phi_private, _phi.begin(), feat_ind, l_bound, u_bound);
                generate_param_feats(feat_1, next_phi_private, _phi.begin(), feat_ind, optimizer, l_bound, u_bound);
            }

            #pragma omp critical
            next_phi.insert(next_phi.end(), next_phi_private.begin(), next_phi_private.end());
        }
        #else
        #pragma omp parallel firstprivate(feat_ind, l_bound, u_bound)
        {
            std::vector<node_ptr> next_phi_private;
#ifdef OMP45
            #pragma omp for schedule(monotonic: dynamic)
#else
            #pragma omp for schedule(dynamic)
#endif
            for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
            {
                generate_non_param_feats(feat_1, next_phi_private, _phi.begin(), feat_ind, l_bound, u_bound);
            }

            #pragma omp critical
            next_phi.insert(next_phi.end(), next_phi_private.begin(), next_phi_private.end());
        }
        #endif
        _start_gen.push_back(_phi.size());
        node_value_arrs::clear_temp_reg();
        if((nn < _max_phi) || (nn <= _n_rung_store) || (_mpi_comm->size() == 1))
        {
            int new_phi_size;
            int phi_size_start = _phi.size();
            if(_mpi_comm->rank() == 0)
            {
                std::vector<std::vector<node_ptr>> next_phi_gathered;
                mpi::gather(*_mpi_comm, next_phi, next_phi_gathered, 0);
                feat_ind = _phi.size();
                for(auto& next_phi_vec : next_phi_gathered)
                {
                    _phi.insert(_phi.end(), next_phi_vec.begin(), next_phi_vec.end());
                }
                new_phi_size = _phi.size();

                // Sort the features to ensure consistent feature spaces for all MPI/OpenMP configurations
                std::sort(
                    _phi.begin() + _start_gen.back(),
                    _phi.end(),
                    [feat_ind](node_ptr n1, node_ptr n2){return n1->sort_score(feat_ind) < n2->sort_score(feat_ind);}
                );

                // Reindex sorted features
                std::for_each(
                    _phi.begin() + _start_gen.back(),
                    _phi.end(),
                    [&feat_ind](node_ptr n){n->reindex(feat_ind); ++feat_ind;}
                );

                mpi::broadcast(*_mpi_comm, new_phi_size, 0);

                for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
                {
                    mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
                }
            }
            else
            {
                mpi::gather(*_mpi_comm, next_phi, 0);
                mpi::broadcast(*_mpi_comm, new_phi_size, 0);
                _phi.resize(new_phi_size);

                for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
                {
                    mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
                }
            }

            if(phi_size_start == new_phi_size)
            {
                throw std::logic_error("No features created during this rung (" + std::to_string(nn) + ")");
            }

            node_value_arrs::clear_temp_reg();
            if(nn < _max_phi)
            {
                // Remove identical features
                _scores.resize(_phi.size());
                _mpi_comm->barrier();
                project_funcs::project_r(_prop.data(), _scores.data(), _phi, _task_sizes, 1);
                _scores.erase(_scores.begin(), _scores.begin() + _start_gen[_start_gen.size() - 1]);
                inds = util_funcs::argsort<double>(_scores);

                std::vector<int> del_inds;

                _mpi_comm->barrier();
                node_value_arrs::clear_temp_reg();
                for(int sc = 0; sc < _scores.size() - 1; ++sc)
                {
                    #ifdef PARAMETERIZE
                    if(_phi[inds[sc] + _start_gen.back()]->n_params() > 0)
                    {
                        continue;
                    }
                    #endif

                    if(_scores[inds[sc]] > -1e-10)
                    {
                        double base_val = std::abs(
                            util_funcs::r(
                                _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                _n_samp
                            )
                        );
                        for(int sc2 = sc + 1; sc2 < _scores.size(); ++sc2)
                        {
                            double comp = std::abs(
                                base_val - std::abs(
                                    util_funcs::r(
                                        _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                        _phi[_start_gen.back() + inds[sc2]]->value_ptr(0, true),
                                        _n_samp
                                    )
                                )
                            );
                            if(comp < 1e-10)
                            {
                                del_inds.push_back(-1 * (inds[sc] + _start_gen.back()));
                                break;
                            }
                        }
                    }
                    else if(_scores[inds[sc + 1]] - _scores[inds[sc]] < 1e-10)
                    {
                        double base_val = std::abs(
                            util_funcs::r(
                                _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                _n_samp
                            )
                        );
                        double comp = std::abs(
                            base_val - std::abs(
                                util_funcs::r(
                                    _phi[_start_gen.back() + inds[sc]]->value_ptr(),
                                    _phi[_start_gen.back() + inds[sc + 1]]->value_ptr(0, true),
                                    _n_samp
                                )
                            )
                        );
                        if(comp < 1e-10)
                        {
                            del_inds.push_back(-1 * (inds[sc] + _start_gen.back()));
                        }
                    }
                }

                inds = util_funcs::argsort<int>(del_inds);
                for(int ii = 0; ii < inds.size(); ++ii)
                {
                    _phi.erase(_phi.begin() - del_inds[inds[ii]]);
                }

                // Reindex
                for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
                {
                    _phi[ff]->reindex(ff);
                }
            }
            node_value_arrs::clear_temp_reg();
            for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
            {
                _phi[ff]->reset_feats(_phi);
            }

            if(nn <= _n_rung_store)
            {
                node_value_arrs::resize_values_arr(nn, _phi.size());

                for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
                {
                    _phi[ff]->set_value();
                    _phi[ff]->set_test_value();
                }
            }
        }
        else
        {
            std::vector<size_t> next_phi_sizes(_mpi_comm->size());
            if(_mpi_comm->rank() == 0)
            {
                mpi::gather(*_mpi_comm, next_phi.size(), next_phi_sizes.data(), 0);
                mpi::broadcast(*_mpi_comm, next_phi_sizes.data(), next_phi_sizes.size(), 0);
            }
            else
            {
                mpi::gather(*_mpi_comm, next_phi.size(), 0);
                mpi::broadcast(*_mpi_comm, next_phi_sizes.data(), next_phi_sizes.size(), 0);
            }

            size_t n_feat = std::accumulate(next_phi_sizes.begin(), next_phi_sizes.end(), 0);
            if(n_feat == 0)
            {
                throw std::logic_error("No features created during this rung (" + std::to_string(nn) + ")");
            }

            size_t n_feat_rank = n_feat / _mpi_comm->size();
            size_t n_feat_below_rank = _mpi_comm->rank() * n_feat_rank;
            size_t n_feat_added = 0;
            if(_mpi_comm->rank() < n_feat % _mpi_comm->size())
            {
                ++n_feat_rank;
                n_feat_below_rank += _mpi_comm->rank();
            }
            else
            {
                n_feat_below_rank += n_feat % _mpi_comm->size();
            }

            while((n_feat_added < n_feat_rank) && (next_phi.size() > 0))
            {
                _phi.push_back(next_phi.back());
                next_phi.pop_back();
                ++n_feat_added;
            }

            // This can be calculated without an all_gather, using it to not introduce too many things at one time
            std::vector<size_t> next_phi_needed(_mpi_comm->size());
            std::vector<size_t> next_phi_excess(_mpi_comm->size());
            if(_mpi_comm->rank() == 0)
            {
                mpi::gather(*_mpi_comm, next_phi.size(), next_phi_excess.data(), 0);
                mpi::gather(*_mpi_comm, n_feat_rank - n_feat_added, next_phi_needed.data(), 0);

                mpi::broadcast(*_mpi_comm, next_phi_excess.data(), next_phi_excess.size(), 0);
                mpi::broadcast(*_mpi_comm, next_phi_needed.data(), next_phi_needed.size(), 0);
            }
            else
            {
                mpi::gather(*_mpi_comm, next_phi.size(), 0);
                mpi::gather(*_mpi_comm, n_feat_rank - n_feat_added, 0);

                mpi::broadcast(*_mpi_comm, next_phi_excess.data(), next_phi_excess.size(), 0);
                mpi::broadcast(*_mpi_comm, next_phi_needed.data(), next_phi_needed.size(), 0);
            }

            std::vector<size_t> send_sizes(next_phi_sizes.size(), 0);
            std::vector<size_t> recv_sizes(next_phi_sizes.size(), 0);
            // Is this rank sending or receiving?
            if(next_phi_excess[_mpi_comm->rank()] > 0)
            {
                size_t total_sent = std::accumulate(next_phi_excess.begin(), next_phi_excess.begin() + _mpi_comm->rank(), 0);
                size_t prev_sent_recv = 0;
                size_t send_size = 0;
                int ind = 0;
                while((prev_sent_recv <= total_sent) && (ind < _mpi_comm->size()))
                {
                    prev_sent_recv += next_phi_needed[ind];
                    ++ind;
                }
                send_size = std::min(next_phi.size(), prev_sent_recv - total_sent);
                send_sizes[ind-1] = send_size;
                total_sent = send_size;
                while((total_sent < next_phi.size()) && (ind < _mpi_comm->size()))
                {
                    send_size = std::min(next_phi.size() - total_sent, next_phi_needed[ind]);
                    send_sizes[ind] = send_size;
                    total_sent += send_size;
                    ++ind;
                }

                total_sent = 0;
                for(int pp = 0; pp < send_sizes.size(); ++pp)
                {
                    if(send_sizes[pp] == 0)
                        continue;

                    std::vector<node_ptr> to_send(send_sizes[pp]);
                    std::copy_n(next_phi.begin() + total_sent, send_sizes[pp], to_send.begin());
                    _mpi_comm->send(pp, _mpi_comm->cantorTagGen(_mpi_comm->rank(), pp, 1, 0), to_send);
                    total_sent += send_sizes[pp];
                }
            }
            else
            {
                size_t total_recv = std::accumulate(next_phi_needed.begin(), next_phi_needed.begin() + _mpi_comm->rank(), 0);
                size_t prev_recv_sent = 0;
                size_t recv_size = 0;
                int ind = 0;
                while((prev_recv_sent <= total_recv) && (ind < _mpi_comm->size()))
                {
                    prev_recv_sent += next_phi_excess[ind];
                    ++ind;
                }
                recv_size = std::min(n_feat_rank - n_feat_added, prev_recv_sent - total_recv);
                recv_sizes[ind-1] = recv_size;
                total_recv = recv_size;
                while((total_recv < n_feat_rank - n_feat_added) && (ind < _mpi_comm->size()))
                {
                    recv_size = std::min(n_feat_rank - n_feat_added - total_recv, next_phi_excess[ind]);
                    recv_sizes[ind] = recv_size;
                    total_recv += recv_size;
                    ++ind;
                }

                total_recv = 0;
                for(int pp = 0; pp < recv_sizes.size(); ++pp)
                {
                    if(recv_sizes[pp] == 0)
                    {
                        continue;
                    }

                    std::vector<node_ptr> to_recv;
                    _mpi_comm->recv(pp, _mpi_comm->cantorTagGen(pp, _mpi_comm->rank(), 1, 0), to_recv);
                    for(auto& feat : to_recv)
                    {
                        _phi.push_back(feat);
                    }
                }
            }

            #pragma omp parallel for
            for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
            {
                _phi[ff]->reindex(ff + n_feat_below_rank, ff);
                _phi[ff]->set_value();
                _phi[ff]->set_test_value();
            }
        }

#ifdef PARAMETERIZE
        // Reorder features based on the number of parameters they have (none goes first)
        std::vector<int> feat_n_params(_phi.size() - _start_gen.back());
        std::transform(
            _phi.begin() + _start_gen.back(),
            _phi.end(),
            feat_n_params.begin(),
            [](node_ptr feat){return feat->n_params();}
        );
        inds = util_funcs::argsort<int>(feat_n_params);
        next_phi.resize(feat_n_params.size());
        std::copy_n(_phi.begin() + _start_gen.back(), feat_n_params.size(), next_phi.begin());
        std::transform(
            inds.begin(),
            inds.end(),
            _phi.begin() + _start_gen.back(),
            [&next_phi](int ind){return next_phi[ind];}
        );
        for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
        {
            _phi[ff]->reindex(ff);
            _phi[ff]->set_value();
        }

        // Set how many features have no parameters
        _end_no_params.push_back(
            std::count_if(feat_n_params.begin(), feat_n_params.end(), [](int n_param){return n_param == 0;})
        );
#endif
    }
    _n_feat = _phi.size();
}

// void FeatureSpace::project_generated(const double* prop, const int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel)
void FeatureSpace::project_generated(std::shared_ptr<LossFunction> loss, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel)
{
    std::vector<double> scores_sel_all(node_value_arrs::N_SELECTED);
    if(node_value_arrs::N_SELECTED > _n_sis_select)
    {
        scores_sel_all.resize(_phi_selected.size());
        project_funcs::project_loss(loss, _phi_selected, scores_sel_all.data());
    }
    std::copy_n(scores_sel.data(), scores_sel.size(), &scores_sel_all[node_value_arrs::N_SELECTED - _n_sis_select]);

    int worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
    double worst_score = scores_sel[worst_score_ind];

    #pragma omp parallel firstprivate(worst_score, worst_score_ind, scores_sel_all)
    {
        std::shared_ptr<LossFunction> loss_copy = loss_function_util::copy(loss);
        std::vector<node_ptr> phi_sel_private(phi_sel);
        std::vector<double> scores_sel_private(scores_sel);
        int index_base = _phi.size() + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size());

        #ifdef PARAMETERIZE
        std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth);
        std::shared_ptr<NLOptimizer> reparam_optimizer;
        if(_reparam_residual)
        {
            std::vector<double> prop_vec(loss_copy->prop_project());
            reparam_optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop_vec, _max_phi, _max_param_depth);
        }
        else
        {
            reparam_optimizer = nullptr;
        }
        #endif

#ifdef OMP45
        #pragma omp for schedule(monotonic: dynamic)
#else
        #pragma omp for schedule(dynamic)
#endif
        for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
        {
            unsigned long int feat_ind = _phi.size() + _n_sis_select * (omp_get_num_threads() + _mpi_comm->size());

            node_value_arrs::clear_temp_reg_thread();
            std::vector<node_ptr> generated_phi;

            bool is_sel = (*feat)->selected();
            (*feat)->set_selected(false);
#ifdef PARAMETERIZE
            generate_non_param_feats(feat, generated_phi, _phi.begin(), feat_ind, _l_bound, _u_bound);
            generate_param_feats(feat, generated_phi, _phi.begin(), feat_ind, optimizer, _l_bound, _u_bound);
            if(reparam_optimizer && (feat < _phi.begin() + _end_no_params.back()))
            {
                generate_reparam_feats(feat, generated_phi, feat_ind, reparam_optimizer, _l_bound, _u_bound);
            }
#else
            generate_non_param_feats(feat, generated_phi, _phi.begin(), feat_ind, _l_bound, _u_bound);
#endif
            (*feat)->set_selected(is_sel);

            if(generated_phi.size() == 0)
            {
                continue;
            }
            node_value_arrs::clear_temp_reg_thread();
            std::vector<double> scores(generated_phi.size());
            project_funcs::project_loss_no_omp(loss_copy, generated_phi, scores.data());

            std::vector<int> inds = util_funcs::argsort<double>(scores);

            int ii = 0;
            while((ii < inds.size()) && (scores[inds[ii]] < -1.0))
            {
                ++ii;
            }

            while((ii < inds.size()) && ((scores[inds[ii]] < worst_score) || (phi_sel_private.size() < _n_sis_select)))
            {
                double cur_score = scores[inds[ii]];
                bool valid_feat = _is_valid(
                    generated_phi[inds[ii]]->value_ptr(0),
                    _n_samp,
                    _cross_cor_max,
                    scores_sel_all,
                    cur_score,
                    node_value_arrs::N_SELECTED - _n_sis_select,
                    0
                );
                valid_feat = valid_feat && _is_valid_feat_list(
                    generated_phi[inds[ii]]->value_ptr(0),
                    _n_samp,
                    _cross_cor_max,
                    phi_sel_private,
                    scores_sel_private,
                    cur_score
                );
                if(valid_feat)
                {
                    if(scores_sel_private.size() == _n_sis_select)
                    {
                        generated_phi[inds[ii]]->reindex(index_base + worst_score_ind);
                        generated_phi[inds[ii]]->set_value();
                        phi_sel_private[worst_score_ind] = generated_phi[inds[ii]];
                        scores_sel_private[worst_score_ind] = cur_score;
                    }
                    else
                    {
                        generated_phi[inds[ii]]->reindex(index_base + scores_sel_private.size());
                        generated_phi[inds[ii]]->set_value();
                        phi_sel_private.push_back(generated_phi[inds[ii]]);
                        scores_sel_private.push_back(cur_score);
                    }
                    worst_score_ind = std::max_element(scores_sel_private.begin(), scores_sel_private.end()) - scores_sel_private.begin();
                    worst_score = scores_sel_private[worst_score_ind];
                }
                ++ii;
            }
        }

        #pragma omp critical
        {
            index_base = _phi.size() + _n_sis_select * _mpi_comm->rank();
            worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
            for(int sc = 0; sc < scores_sel_private.size(); ++sc)
            {
                if(
                    ((phi_sel.size() < _n_sis_select) || (scores_sel_private[sc] < scores_sel[worst_score_ind])) &&
                    _is_valid_feat_list(phi_sel_private[sc]->value_ptr(), _n_samp, _cross_cor_max, phi_sel, scores_sel, scores_sel_private[sc])
                )
                {
                    if(phi_sel.size() == _n_sis_select)
                    {
                        scores_sel[worst_score_ind] = scores_sel_private[sc];
                        phi_sel[worst_score_ind] = phi_sel_private[sc];
                        if(phi_sel[worst_score_ind]->rung() == _max_phi)
                        {
                            phi_sel[worst_score_ind]->reindex(index_base + worst_score_ind);
                        }
                    }
                    else
                    {
                        scores_sel.push_back(scores_sel_private[sc]);
                        phi_sel.push_back(phi_sel_private[sc]);
                        if(phi_sel.back()->rung() == _max_phi)
                        {
                            phi_sel.back()->reindex(index_base + phi_sel.size());
                        }
                    }
                    worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
                }
            }
        }
    }
}

void FeatureSpace::sis(const std::vector<double>& prop)
{
    sis(
        loss_function_util::get_loss_function(
            _project_type,
            prop,
            {},
            _task_sizes,
            std::vector<int>(_task_sizes.size(), 0),
            false
        )
    );
}

void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
{
#ifdef PARAMETERIZE
    // Reparameterize for the residuals
    if(_reparam_residual && (_phi_selected.size() > 0))
    {
        double start_time = omp_get_wtime();
        _phi.resize(_n_feat);
        _phi_reparam.resize(0);
        _start_gen_reparam.resize(0);
        generate_reparam_feature_set(loss->prop_project());
        _phi.insert(_phi.end(), _phi_reparam.begin(), _phi_reparam.end());
        _scores.resize(_phi.size());
    }
#endif
    // Create output directories if needed
    boost::filesystem::path p(_feature_space_file.c_str());
    boost::filesystem::create_directories(p.remove_filename());

    // Set up output file generation
    std::ofstream out_file_stream = std::ofstream();
    out_file_stream.open(_feature_space_file, std::ios::app);

    std::ofstream sum_file_stream = std::ofstream();
    sum_file_stream.open(_feature_space_summary_file, std::ios::app);

    // Set up selection data structures
    std::vector<node_ptr> phi_sel;
    std::vector<double> scores_sel(_n_sis_select, std::numeric_limits<double>::infinity());
    phi_sel.reserve(_n_sis_select);

    int cur_feat = node_value_arrs::N_SELECTED;
    node_value_arrs::resize_d_matrix_arr(_n_sis_select);
    _phi_selected.reserve(_phi_selected.size() + _n_sis_select);
    // Get projection scores
    double start_time = omp_get_wtime();
    project_funcs::project_loss(loss, _phi, _scores.data());

    _mpi_comm->barrier();
    if(_mpi_comm->rank() == 0)
    {
        std::cout << "Projection time: " << omp_get_wtime() - start_time << " s" << std::endl;
    }

    // Sort the scores to get inds
    std::vector<int> inds = util_funcs::argsort<double>(_scores);
    int ii = 0;
    int cur_feat_local = 0;
    double cur_score = 0.0;

    // Advance the the first valid score
    while(_scores[inds[ii]] < -1.0000000001)
    {
        ++ii;
    }

    // Get the scores of the previously selected features to facilitate the comparison
    std::vector<double> scores_sel_all(node_value_arrs::N_SELECTED, 0.0);
    if(node_value_arrs::N_SELECTED > _n_sis_select)
    {
        project_funcs::project_loss(loss, _phi_selected, scores_sel_all.data());
        // _project(prop.data(), scores_sel_all.data(), _phi_selected, _task_sizes, prop.size() / _n_samp);
    }

    // Get the best features currently generated on this rank
    start_time = omp_get_wtime();
    while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
    {
        if(_is_valid(_phi[inds[ii]]->value_ptr(), _n_samp, _cross_cor_max, scores_sel_all, _scores[inds[ii]], cur_feat + cur_feat_local, 0))
        {
            scores_sel[cur_feat_local] = _scores[inds[ii]];
            scores_sel_all[cur_feat + cur_feat_local] = _scores[inds[ii]];
            phi_sel.push_back(_phi[inds[ii]]);
            phi_sel.back()->set_selected(true);
            phi_sel.back()->set_d_mat_ind(cur_feat + cur_feat_local);
            phi_sel.back()->set_value();

            ++cur_feat_local;
        }
        ++ii;
    }

    _mpi_comm->barrier();
    if(_mpi_comm->rank() == 0)
    {
        std::cout << "Time to get best features on rank : " << omp_get_wtime() - start_time << " s" << std::endl;
    }


    // Remove selection criteria
    for(auto& feat : phi_sel)
    {
        feat->set_selected(false);
        feat->set_d_mat_ind(-1);
    }

    // Generate features on the fly and get the best features for this rank
    if(_n_rung_generate > 0)
    {
#ifdef PARAMETERIZE
        _phi.resize(_n_feat);
        _phi.insert(_phi.end(), _phi_reparam.begin() + _start_gen_reparam.back(), _phi_reparam.end());
        _scores.resize(_phi.size());
#endif
        start_time = omp_get_wtime();
        phi_sel.resize(cur_feat_local);
        scores_sel.resize(cur_feat_local);

        project_generated(loss, phi_sel, scores_sel);

        _mpi_comm->barrier();
        if(_mpi_comm->rank() == 0)
        {
            std::cout << "Projection time for features generated on the fly: " << omp_get_wtime() - start_time << " s" << std::endl;
        }
    }
    // Reset scores
    std::fill_n(&scores_sel_all[cur_feat], _n_sis_select, 0.0);

    // If we are only on one process then phi_sel are the selected features
    start_time = omp_get_wtime();
    if(_mpi_comm->size() > 1)
    {
        // Collect the best features from all ranks
        std::vector<node_sc_pair> local_sel(_n_sis_select, std::make_tuple<node_ptr, double>(nullptr, std::numeric_limits<double>::max()));
        for(int ff = 0; ff < phi_sel.size(); ++ff)
        {
            local_sel[ff] = mpi_reduce_op::make_node_sc_pair(phi_sel[ff], scores_sel[ff]);
        }
        std::vector<node_sc_pair> selected(_n_sis_select);
        mpi::all_reduce(
            *_mpi_comm,
            local_sel,
            selected,
            mpi_reduce_op::select_top_feats
        );

        // Move the selected features to _phi_selected and set the value properly
        _mpi_comm->barrier();
        cur_feat_local = 0;
        for(auto& sel : selected)
        {
            _phi_selected.push_back(std::get<0>(sel));
            _phi_selected.back()->set_selected(true);
            _phi_selected.back()->set_d_mat_ind(cur_feat);
            _phi_selected.back()->set_value();
            ++cur_feat_local;
            ++cur_feat;
        }

        // Output the information about the selected features
        if(_mpi_comm->rank() == 0)
        {
            cur_feat -= cur_feat_local;
            for(auto& sel : selected)
            {
                out_file_stream << std::setw(14) <<std::left << cur_feat << _phi_selected[cur_feat]->postfix_expr() << std::endl;
                sum_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * std::get<1>(sel);
                sum_file_stream << _phi_selected[cur_feat]->expr() << std::endl;

                ++cur_feat;
            }
        }
        // Ensure that enough features were selected
        if(selected.size() < _n_sis_select)
        {
            throw std::logic_error(
                "SIS went through all features and did not select enough (" +
                std::to_string(selected.size()) +
                " not " +
                std::to_string(_n_sis_select) +
                ")."
            );
        }
    }
    else
    {
        // Ensure that enough features were selected
        if(phi_sel.size() < _n_sis_select)
        {
            throw std::logic_error(
                "SIS went through all features and did not select enough (" +
                std::to_string(phi_sel.size()) +
                " not " +
                std::to_string(_n_sis_select) +
                ")."
            );
        }

        cur_feat_local = 0;
        // Move selected features into _phi_selected and add the features to the output files
        inds = util_funcs::argsort<double>(scores_sel);
        for(auto& ind : inds)
        {
            node_value_arrs::clear_temp_reg();
            out_file_stream << std::setw(14) <<std::left << cur_feat << phi_sel[ind]->postfix_expr() << std::endl;
            sum_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * scores_sel[ind];
            sum_file_stream << phi_sel[ind]->expr() << std::endl;

            _phi_selected.push_back(phi_sel[ind]);
            _phi_selected.back()->set_selected(true);
            _phi_selected.back()->set_d_mat_ind(cur_feat);
            _phi_selected.back()->set_value();

            ++cur_feat;
            ++cur_feat_local;
        }
    }

    if(cur_feat != node_value_arrs::N_SELECTED)
    {
        throw std::logic_error("SIS went through all features and did not select enough.");
    }

    if(_mpi_comm->rank() == 0)
    {
        std::cout << "Complete final combination/selection from all ranks: " << omp_get_wtime() - start_time << " s" << std::endl;
        out_file_stream << "#";
        for(int dd = 0; dd < 71; ++dd)
        {
            out_file_stream << "-";
        }
        out_file_stream << std::endl;
        out_file_stream.close();

        sum_file_stream << "#";
        for(int dd = 0; dd < 71; ++dd)
        {
            sum_file_stream << "-";
        }
        sum_file_stream << std::endl;
        sum_file_stream.close();
    }
}

void FeatureSpace::remove_feature(const int ind)
{
    if(_phi[ind]->selected())
    {
        throw std::logic_error("Can't remove a selected feature");
    }

    _phi.erase(_phi.begin() + ind);
}