-
Thomas Purcell authored
This will make it easier for users who do not want to use the bindings
Thomas Purcell authoredThis will make it easier for users who do not want to use the bindings
FeatureSpace.cpp 29.90 KiB
#include <feature_creation/feature_space/FeatureSpace.hpp>
BOOST_CLASS_EXPORT_GUID(FeatureNode, "FeatureNode")
BOOST_CLASS_EXPORT_GUID(AddNode, "AddNode")
BOOST_CLASS_EXPORT_GUID(SubNode, "SubNode")
BOOST_CLASS_EXPORT_GUID(AbsDiffNode, "AbsDiffNode")
BOOST_CLASS_EXPORT_GUID(MultNode, "MultNode")
BOOST_CLASS_EXPORT_GUID(DivNode, "DivNode")
BOOST_CLASS_EXPORT_GUID(SqNode, "SqNode")
BOOST_CLASS_EXPORT_GUID(SqrtNode, "SqrtNode")
BOOST_CLASS_EXPORT_GUID(CbNode, "CbNode")
BOOST_CLASS_EXPORT_GUID(CbrtNode, "CbrtNode")
BOOST_CLASS_EXPORT_GUID(SixPowNode, "SixPowNode")
BOOST_CLASS_EXPORT_GUID(ExpNode, "ExpNode")
BOOST_CLASS_EXPORT_GUID(NegExpNode, "NegExpNode")
BOOST_CLASS_EXPORT_GUID(LogNode, "LogNode")
BOOST_CLASS_EXPORT_GUID(AbsNode, "AbsNode")
BOOST_CLASS_EXPORT_GUID(InvNode, "InvNode")
BOOST_CLASS_EXPORT_GUID(SinNode, "SinNode")
BOOST_CLASS_EXPORT_GUID(CosNode, "CosNode")
FeatureSpace::FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
std::vector<std::string> allowed_ops,
std::vector<double> prop,
std::vector<int> task_sizes,
int max_phi,
int n_sis_select,
int max_store_rung,
int n_rung_generate,
double min_abs_feat_val,
double max_abs_feat_val
):
_phi(phi_0),
_phi_0(phi_0),
_allowed_ops(allowed_ops),
_scores(phi_0.size(), 0.0),
_task_sizes(task_sizes),
_start_gen(1, 0),
_feature_space_file("feature_space/selected_features.txt"),
_mpi_comm(mpi_comm),
_l_bound(min_abs_feat_val),
_u_bound(max_abs_feat_val),
_max_phi(max_phi),
_n_sis_select(n_sis_select),
_n_samp(phi_0[0]->n_samp()),
_n_feat(phi_0.size()),
_n_rung_store(max_store_rung),
_n_rung_generate(n_rung_generate)
{
initialize_fs(prop);
}
void FeatureSpace::initialize_fs(std::vector<double> prop)
{
if(_n_rung_store == -1)
_n_rung_store = _max_phi - 1;
if(_n_rung_generate > 1)
throw std::logic_error("A maximum of one rung can be generated on the fly.");
else if(_max_phi - _n_rung_generate < _n_rung_store)
throw std::logic_error("Requesting to store more rungs than what can be pre-generated.");
if(_mpi_comm->rank() == 0)
{
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open(_feature_space_file);
out_file_stream << std::setw(14) <<std::left << "# FEAT_ID" << std::setw(24) << std::left << "Score" << "Feature Expression" << std::endl;
out_file_stream.close();
}
_project = project_funcs::project_r;
for(auto & op : _allowed_ops)
{
if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0))
_com_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
else if((op.compare("div") == 0))
_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
else
_un_operators.push_back(allowed_op_maps::unary_operator_map[op]);
}
generate_feature_space(prop);
_scores.reserve(_phi.size());
_scores.resize(_phi.size());
}
void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound, double u_bound)
{
int phi_ind = feat - _phi.begin();
feat_set.reserve(feat_set.size() + _un_operators.size() + phi_ind * (_com_bin_operators.size() + 2 * _bin_operators.size()));
for(auto& op : _un_operators)
{
try
{
feat_set.push_back(op(*feat, feat_ind, l_bound, u_bound));
++feat_ind;
}
catch(const InvalidFeatureException& e)
{
// Do Nothing
}
}
for(auto& op : _com_bin_operators)
{
for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
{
try
{
feat_set.push_back(op(*feat, *feat_2, feat_ind, l_bound, u_bound));
++feat_ind;
}
catch(const InvalidFeatureException& e)
{
// Do Nothing
}
}
}
for(auto& op : _bin_operators)
{
for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
{
try
{
feat_set.push_back(op(*feat, *feat_2, feat_ind, l_bound, u_bound));
++feat_ind;
}
catch(const InvalidFeatureException& e)
{
// Do Nothing
}
try
{
feat_set.push_back(op(*feat_2, *feat, feat_ind, l_bound, u_bound));
++feat_ind;
}
catch(const InvalidFeatureException& e)
{
// Do Nothing
}
}
}
}
void FeatureSpace::generate_feature_space(std::vector<double>& prop)
{
double u_bound = 1e50;
double l_bound = 1e-50;
std::vector<int> inds;
for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn)
{
if(nn == _max_phi)
{
u_bound = _u_bound;
l_bound = _l_bound;
}
std::vector<node_ptr> next_phi;
_n_feat = _phi.size();
int feat_ind = _phi.size();
for(auto feat_1 = _phi.begin() + _mpi_comm->rank() + _start_gen.back(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
generate_new_feats(feat_1, next_phi, feat_ind, l_bound, u_bound);
_start_gen.push_back(_phi.size());
if((nn < _max_phi) || (nn <= _n_rung_store) || (_mpi_comm->size() == 1))
{
int new_phi_size;
int phi_size_start = _phi.size();
if(_mpi_comm->rank() == 0)
{
std::vector<std::vector<node_ptr>> next_phi_gathered;
mpi::gather(*_mpi_comm, next_phi, next_phi_gathered, 0);
feat_ind = _phi.size();
for(auto& next_phi_vec : next_phi_gathered)
{
_phi.reserve(_phi.size() + next_phi_vec.size());
for(auto& feat : next_phi_vec)
{
feat->reindex(feat_ind);
_phi.push_back(feat);
++feat_ind;
}
}
new_phi_size = _phi.size();
mpi::broadcast(*_mpi_comm, new_phi_size, 0);
for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
}
else
{
mpi::gather(*_mpi_comm, next_phi, 0);
mpi::broadcast(*_mpi_comm, new_phi_size, 0);
_phi.resize(new_phi_size);
for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
}
// feat_ind = _phi.size();
node_value_arrs::clear_temp_reg();
if(nn < _max_phi)
{
// std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1);
// Remove identical features
_scores.resize(_phi.size());
_mpi_comm->barrier();
_project(prop.data(), _scores.data(), _phi, _task_sizes, 1);
_scores.erase(_scores.begin(), _scores.begin() + _start_gen[_start_gen.size() - 1]);
inds = util_funcs::argsort(_scores);
std::vector<int> del_inds;
_mpi_comm->barrier();
for(int sc = 0; sc < _scores.size() - 1; ++sc)
if(_scores[inds[sc + 1]] - _scores[inds[sc]] < 1e-10)
if(std::abs(util_funcs::r(_phi[_start_gen.back() + inds[sc]]->value_ptr(), _phi[_start_gen.back() + inds[sc]]->value_ptr(), _n_samp) - std::abs(util_funcs::r(_phi[_start_gen.back() + inds[sc]]->value_ptr(), _phi[_start_gen.back() + inds[sc + 1]]->value_ptr(), _n_samp))) < 1e-13)
del_inds.push_back(-1 * (inds[sc] + _start_gen.back()));
inds = util_funcs::argsort(del_inds);
for(int ii = 0; ii < inds.size(); ++ii)
_phi.erase(_phi.begin() - del_inds[inds[ii]]);
// Reindex
for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
_phi[ff]->reindex(ff);
}
if(nn <= _n_rung_store)
{
bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store);
node_value_arrs::resize_values_arr(nn, _phi.size(), use_temp);
for(int ff = _start_gen[0]; ff < _phi.size(); ++ff)
{
_phi[ff]->set_value();
_phi[ff]->set_test_value();
}
}
}
else
{
std::vector<size_t> next_phi_sizes(_mpi_comm->size());
if(_mpi_comm->rank() == 0)
{
mpi::gather(*_mpi_comm, next_phi.size(), next_phi_sizes.data(), 0);
mpi::broadcast(*_mpi_comm, next_phi_sizes.data(), next_phi_sizes.size(), 0);
}
else
{
mpi::gather(*_mpi_comm, next_phi.size(), 0);
mpi::broadcast(*_mpi_comm, next_phi_sizes.data(), next_phi_sizes.size(), 0);
}
size_t n_feat = std::accumulate(next_phi_sizes.begin(), next_phi_sizes.end(), 0);
size_t n_feat_rank = n_feat / _mpi_comm->size();
size_t n_feat_below_rank = _mpi_comm->rank() * n_feat_rank;
size_t n_feat_added = 0;
if(_mpi_comm->rank() < n_feat % _mpi_comm->size())
{
++n_feat_rank;
n_feat_below_rank += _mpi_comm->rank();
}
else
{
n_feat_below_rank += n_feat % _mpi_comm->size();
}
while((n_feat_added < n_feat_rank) && (next_phi.size() > 0))
{
_phi.push_back(next_phi.back());
next_phi.pop_back();
++n_feat_added;
}
// This can be calculated without an all_gather, using it to not introduce too many things at one time
std::vector<size_t> next_phi_needed(_mpi_comm->size());
std::vector<size_t> next_phi_excess(_mpi_comm->size());
if(_mpi_comm->rank() == 0)
{
mpi::gather(*_mpi_comm, next_phi.size(), next_phi_excess.data(), 0);
mpi::gather(*_mpi_comm, n_feat_rank - n_feat_added, next_phi_needed.data(), 0);
mpi::broadcast(*_mpi_comm, next_phi_excess.data(), next_phi_excess.size(), 0);
mpi::broadcast(*_mpi_comm, next_phi_needed.data(), next_phi_needed.size(), 0);
}
else
{
mpi::gather(*_mpi_comm, next_phi.size(), 0);
mpi::gather(*_mpi_comm, n_feat_rank - n_feat_added, 0);
mpi::broadcast(*_mpi_comm, next_phi_excess.data(), next_phi_excess.size(), 0);
mpi::broadcast(*_mpi_comm, next_phi_needed.data(), next_phi_needed.size(), 0);
}
std::vector<size_t> send_sizes(next_phi_sizes.size(), 0);
std::vector<size_t> recv_sizes(next_phi_sizes.size(), 0);
// Is this rank sending or receiving?
if(next_phi_excess[_mpi_comm->rank()] > 0)
{
size_t total_sent = std::accumulate(next_phi_excess.begin(), next_phi_excess.begin() + _mpi_comm->rank(), 0);
size_t prev_sent_recv = 0;
size_t send_size = 0;
int ind = 0;
while((prev_sent_recv <= total_sent) && (ind < _mpi_comm->size()))
{
prev_sent_recv += next_phi_needed[ind];
++ind;
}
send_size = std::min(next_phi.size(), prev_sent_recv - total_sent);
send_sizes[ind-1] = send_size;
total_sent = send_size;
while((total_sent < next_phi.size()) && (ind < _mpi_comm->size()))
{
send_size = std::min(next_phi.size() - total_sent, next_phi_needed[ind]);
send_sizes[ind] = send_size;
total_sent += send_size;
++ind;
}
total_sent = 0;
for(int pp = 0; pp < send_sizes.size(); ++pp)
{
if(send_sizes[pp] == 0)
continue;
std::vector<node_ptr> to_send(send_sizes[pp]);
std::copy_n(next_phi.begin() + total_sent, send_sizes[pp], to_send.begin());
_mpi_comm->send(pp, _mpi_comm->cantorTagGen(_mpi_comm->rank(), pp, 1, 0), to_send);
total_sent += send_sizes[pp];
}
}
else
{
size_t total_recv = std::accumulate(next_phi_needed.begin(), next_phi_needed.begin() + _mpi_comm->rank(), 0);
size_t prev_recv_sent = 0;
size_t recv_size = 0;
int ind = 0;
while((prev_recv_sent <= total_recv) && (ind < _mpi_comm->size()))
{
prev_recv_sent += next_phi_excess[ind];
++ind;
}
recv_size = std::min(n_feat_rank - n_feat_added, prev_recv_sent - total_recv);
recv_sizes[ind-1] = recv_size;
total_recv = recv_size;
while((total_recv < n_feat_rank - n_feat_added) && (ind < _mpi_comm->size()))
{
recv_size = std::min(n_feat_rank - n_feat_added - total_recv, next_phi_excess[ind]);
recv_sizes[ind] = recv_size;
total_recv += recv_size;
++ind;
}
total_recv = 0;
for(int pp = 0; pp < recv_sizes.size(); ++pp)
{
if(recv_sizes[pp] == 0)
continue;
std::vector<node_ptr> to_recv;
_mpi_comm->recv(pp, _mpi_comm->cantorTagGen(pp, _mpi_comm->rank(), 1, 0), to_recv);
for(auto& feat : to_recv)
_phi.push_back(feat);
}
}
if(_max_phi <= _n_rung_store)
{
bool use_temp = (_max_phi > _n_rung_store);
node_value_arrs::resize_values_arr(nn, _phi.size(), use_temp);
}
for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
{
_phi[ff]->reindex(ff + n_feat_below_rank, ff);
_phi[ff]->set_value();
_phi[ff]->set_test_value();
}
}
}
_n_feat = _phi.size();
}
void FeatureSpace::project_generated(double* prop, int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
{
std::vector<double> scores_prev_sel;
if(node_value_arrs::N_SELECTED > _n_sis_select)
{
scores_prev_sel.resize(_phi_selected.size());
_project(prop, scores_prev_sel.data(), _phi_selected, _task_sizes, size / _n_samp);
}
int worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
double worst_score = scores_sel[worst_score_ind];
for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
{
node_value_arrs::clear_temp_reg();
node_value_arrs::clear_temp_test_reg();
int feat_ind = _phi.size();
std::vector<node_ptr> generated_phi;
generate_new_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound);
std::vector<double> scores(generated_phi.size(), 0.0);
_project(prop, scores.data(), generated_phi, _task_sizes, size / _n_samp);
std::vector<int> inds = util_funcs::argsort(scores);
int ii = 0;
while((ii < inds.size()) && (scores[inds[ii]] < worst_score))
{
double cur_score = scores[inds[ii]];
bool is_valid = valid_score_against_current(scores_sel.size(), generated_phi[inds[ii]]->value_ptr(), cur_score, scores_sel, scores_comp);
// Check the feature against those selected from previous SIS iterations
if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid)
is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_prev_sel, scores_comp);
if(is_valid)
{
if(scores_sel.size() == _n_sis_select)
{
phi_sel[worst_score_ind]->set_selected(false);
phi_sel[worst_score_ind]->set_d_mat_ind(-1);
phi_sel[worst_score_ind] = generated_phi[inds[ii]];
scores_sel[worst_score_ind] = cur_score;
phi_sel[worst_score_ind]->set_selected(true);
phi_sel[worst_score_ind]->set_d_mat_ind(node_value_arrs::N_SELECTED - _n_sis_select + worst_score_ind);
phi_sel[worst_score_ind]->set_value();
}
else
{
phi_sel.push_back(generated_phi[inds[ii]]);
scores_sel.push_back(cur_score);
phi_sel.back()->set_selected(true);
phi_sel.back()->set_d_mat_ind(node_value_arrs::N_SELECTED - _n_sis_select + scores_sel.size());
phi_sel.back()->set_value();
}
worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
worst_score = scores_sel[worst_score_ind];
}
++ii;
}
}
}
bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp)
{
std::transform(scores_past.begin(), scores_past.end(), scores_comp.begin(), [&cur_score](double score){return cur_score - score;});
// If two scores are the same then they are possibly the same feature, if not then they can't be
if(*std::min_element(scores_comp.begin(), scores_comp.end()) < 1e-10)
{
int dd = std::min_element(scores_comp.begin(), scores_comp.end()) - scores_comp.begin();
if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp))) < 1e-10)
return false;
}
return true;
}
bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
{
std::transform(scores_sel.begin(), scores_sel.begin() + end_check, scores_comp.begin(), [&cur_score](double score){return std::abs(cur_score - score);});
// If two scores are the same then they are possibly the same feature, if not then they can't be
if(*std::min_element(scores_comp.begin(), scores_comp.begin() + end_check) < 1e-10)
{
int dd = std::min_element(scores_comp.begin(), scores_comp.begin() + end_check) - scores_comp.begin();
if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(node_value_arrs::N_SELECTED - _n_sis_select + dd), val_ptr, _n_samp))) < 1e-10)
return false;
}
return true;
}
void FeatureSpace::sis(std::vector<double>& prop)
{
boost::filesystem::path p(_feature_space_file.c_str());
boost::filesystem::create_directories(p.remove_filename());
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open(_feature_space_file, std::ios::app);
std::vector<double> scores_comp(std::max(node_value_arrs::N_SELECTED, _n_sis_select), 1.0);
std::vector<double> scores_sel(_n_sis_select, 0.0);
std::vector<node_ptr> phi_sel;
phi_sel.reserve(_n_sis_select);
int cur_feat = node_value_arrs::N_SELECTED;
node_value_arrs::resize_d_matrix_arr(_n_sis_select);
_phi_selected.reserve(_phi_selected.size() + _n_sis_select);
_project(prop.data(), _scores.data(), _phi, _task_sizes, prop.size() / _n_samp);
std::vector<int> inds = util_funcs::argsort(_scores);
int ii = 0;
int cur_feat_local = 0;
double cur_score = 0.0;
std::vector<double> scores_prev_sel;
if(node_value_arrs::N_SELECTED > _n_sis_select)
{
scores_prev_sel.resize(_phi_selected.size());
_project(prop.data(), scores_prev_sel.data(), _phi_selected, _task_sizes, prop.size() / _n_samp);
}
while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
{
bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp);
// Check the feature against those selected from previous SIS iterations
if(cur_feat > 0 && is_valid)
is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
if(is_valid)
{
scores_sel[cur_feat_local] = _scores[inds[ii]];
// phi_sel.push_back(std::make_shared<FeatureNode>(cur_feat + cur_feat_local, _phi[inds[ii]]->expr(), _phi[inds[ii]]->value(), _phi[inds[ii]]->test_value(), _phi[inds[ii]]->unit(), true));
phi_sel.push_back(_phi[inds[ii]]);
phi_sel.back()->set_selected(true);
phi_sel.back()->set_d_mat_ind(cur_feat + cur_feat_local);
phi_sel.back()->set_value();
++cur_feat_local;
}
++ii;
}
if(_n_rung_generate > 0)
{
phi_sel.resize(cur_feat_local);
scores_sel.resize(cur_feat_local);
project_generated(prop.data(), prop.size(), phi_sel, scores_sel, scores_comp);
node_value_arrs::clear_temp_reg();
node_value_arrs::clear_temp_test_reg();
}
// If we are only on one process then phi_sel are the selected features
if(_mpi_comm->size() > 1)
{
phi_sel.resize(_n_sis_select);
scores_sel.resize(_n_sis_select);
// Prepare to get all scores, features, and indicies
std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), 0.0);
// Get the selected scores and indicies from all processes
int unique_scores = 1;
if(_mpi_comm->rank() == 0)
{
std::copy_n(scores_sel.data(), _n_sis_select, sent_scores.data());
for(int rr = 1; rr < _mpi_comm->size(); ++rr)
_mpi_comm->recv(rr, _mpi_comm->cantorTagGen(rr, 0, 1, 0), &sent_scores[rr * _n_sis_select], _n_sis_select);
// Sort the scores and see how many features we need to guarantee we get n_sis unique features
inds = util_funcs::argsort(sent_scores);
int ii = 1;
while((unique_scores < _n_sis_select) && (ii < sent_scores.size()))
{
if(sent_scores[inds[ii]] - sent_scores[inds[ii-1]] > 1e-10)
++unique_scores;
++ii;
}
if(ii == sent_scores.size())
throw std::logic_error("SIS went through all sent feature scores and did not select enough (" + std::to_string(unique_scores) + " not " + std::to_string(_n_sis_select) + ").");
inds.resize(ii);
}
else
{
_mpi_comm->send(0, _mpi_comm->cantorTagGen(_mpi_comm->rank(), 0, 1, 0), scores_sel.data(), _n_sis_select);
inds = {};
}
mpi::broadcast(*_mpi_comm, inds, 0);
// Erase all scores, features, and indicies from the selected arrays that are not needed from those selected locally
int n_check = _n_sis_select - 1;
if(phi_sel.size() < _n_sis_select)
n_check = phi_sel.size() - 1;
for(int ii = n_check; ii >= 0; --ii)
{
int compare_ind = ii + _mpi_comm->rank() * _n_sis_select;
if(std::none_of(inds.begin(), inds.end(), [&compare_ind](int i1){return i1 == compare_ind;}))
{
scores_sel.erase(scores_sel.begin() + ii);
phi_sel[ii]->set_selected(false);
phi_sel[ii]->set_d_mat_ind(-1);
phi_sel.erase(phi_sel.begin() + ii);
}
else
{
phi_sel[ii]->set_selected(false);
phi_sel[ii]->set_d_mat_ind(-1);
}
}
for(auto& feat : phi_sel)
feat->set_selected(false);
scores_sel.resize(_n_sis_select, 0.0);
phi_sel.resize(_n_sis_select, nullptr);
if(_mpi_comm->rank() == 0)
{
std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), 0.0);
std::vector<node_ptr> sent_phi(_n_sis_select * _mpi_comm->size());
std::copy_n(scores_sel.begin(), _n_sis_select, sent_scores.begin());
std::copy_n(phi_sel.begin(), _n_sis_select, sent_phi.begin());
for(int rr = 1; rr < _mpi_comm->size(); ++rr)
{
_mpi_comm->recv(rr, _mpi_comm->cantorTagGen(rr, 0, 2, 0), &sent_scores[rr * _n_sis_select], _n_sis_select);
_mpi_comm->recv(rr, _mpi_comm->cantorTagGen(rr, 0, 2, 1), &sent_phi[rr * _n_sis_select], _n_sis_select);
}
// std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1);
if(inds.size() == unique_scores)
{
inds = util_funcs::argsort(sent_scores);
for(int ii = 0; ii < _n_sis_select; ++ii)
{
out_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl;
_phi_selected.push_back(sent_phi[inds[ii]]);
_phi_selected.back()->set_selected(true);
_phi_selected.back()->set_d_mat_ind(cur_feat);
++cur_feat;
}
}
else
{
inds = util_funcs::argsort(sent_scores);
// Clear out the previous D matrix values (from creation of each process' phi_sel)
std::fill_n(node_value_arrs::get_d_matrix_ptr(cur_feat), _n_sis_select * node_value_arrs::N_SAMPLES, 0.0);
cur_feat_local = 0;
int ii = 0;
std::fill_n(scores_comp.begin(), _n_sis_select, 1.0);
scores_sel = std::vector<double>(_n_sis_select, 0.0);
// Get the n_sis_select best features (compare against features sent from other processes)
while((cur_feat != node_value_arrs::N_SELECTED) && (ii < sent_scores.size()))
{
if(valid_score_against_current(cur_feat_local, sent_phi[inds[ii]]->value().data(), sent_scores[inds[ii]], scores_sel, scores_comp))
{
out_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl;
_phi_selected.push_back(sent_phi[inds[ii]]);
_phi_selected.back()->set_selected(true);
_phi_selected.back()->set_d_mat_ind(cur_feat);
_phi_selected.back()->set_value();
scores_sel[cur_feat_local] = sent_scores[inds[ii]];
++cur_feat_local;
++cur_feat;
}
++ii;
}
}
if(_phi_selected.size() != node_value_arrs::N_SELECTED)
throw std::logic_error("SIS went through all sent features and did not select enough (" + std::to_string(_phi_selected.size() - node_value_arrs::N_SELECTED + _n_sis_select) + " not " + std::to_string(_n_sis_select) + ").");
cur_feat -= cur_feat_local;
}
else
{
_mpi_comm->send(0, _mpi_comm->cantorTagGen(_mpi_comm->rank(), 0, 2, 0), scores_sel.data(), _n_sis_select);
_mpi_comm->send(0, _mpi_comm->cantorTagGen(_mpi_comm->rank(), 0, 2, 1), phi_sel.data(), _n_sis_select);
_phi_selected.resize(node_value_arrs::N_SELECTED);
}
for(int bb = 0; bb <= _n_sis_select / 10000; ++bb)
mpi::broadcast(*_mpi_comm, &_phi_selected[_phi_selected.size() - _n_sis_select + bb * 10000], std::min(_n_sis_select - bb * 10000, 10000), 0);
std::vector<int> rungs(_n_sis_select, 0);
std::transform(_phi_selected.end() - _n_sis_select, _phi_selected.end(), rungs.begin(), [](node_ptr feat){return feat->rung();});
inds = util_funcs::argsort(rungs);
for(int ii = 0; ii < inds.size(); ++ii)
{
_phi_selected[inds[ii] + node_value_arrs::N_SELECTED - _n_sis_select]->set_value();
++cur_feat;
}
}
else
{
if(phi_sel.size() < _n_sis_select)
throw std::logic_error("SIS went through all features and did not select enough (" + std::to_string(phi_sel.size()) + " not " + std::to_string(_n_sis_select) + ").");
cur_feat_local = 0;
inds = util_funcs::argsort(scores_sel);
for(auto& ind : inds)
{
out_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * scores_sel[ind] << phi_sel[ind]->expr() << std::endl;
_phi_selected.push_back(phi_sel[ind]);
_phi_selected.back()->set_d_mat_ind(cur_feat);
_phi_selected.back()->set_value();
++cur_feat;
++cur_feat_local;
}
}
if(cur_feat != node_value_arrs::N_SELECTED)
throw std::logic_error("SIS went through all features and did not select enough.");
if(_mpi_comm->rank() == 0)
{
out_file_stream << "#";
for(int dd = 0; dd < 71; ++dd)
out_file_stream << "-";
out_file_stream << std::endl;
}
if(_mpi_comm->rank() == 0)
out_file_stream.close();
}