diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index 23b16ee917e9b0c854c45aaa6dd3c679d60750a8..54fccb2989c44b7ae461f55d308165063014ea33 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -62,13 +62,13 @@ void FeatureSpace::generate_feature_space() std::vector<node_ptr> next_phi; _n_feat = _phi.size(); - std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size() - _start_gen[_start_gen.size()-1], _start_gen[_start_gen.size()-1]); + // std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size() - _start_gen[_start_gen.size()-1], _start_gen[_start_gen.size()-1]); // next_phi.reserve(node_value_arrs::get_max_number_features(_allowed_ops, 1, start_end[1] - start_end[0])); int feat_ind = _phi.size(); - for(auto feat_1 = _phi.begin() + start_end[0]; feat_1 != _phi.begin() + start_end[1]; ++feat_1) + for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 != _phi.end(); feat_1 += _mpi_comm->size()) { for(auto& op : _un_operators) { @@ -126,27 +126,150 @@ void FeatureSpace::generate_feature_space() } _mpi_comm->barrier(); _start_gen.push_back(_phi.size()); - - std::vector<std::vector<node_ptr>> next_phi_gathered; - mpi::all_gather(*_mpi_comm, next_phi, next_phi_gathered); - - feat_ind = _phi.size(); - for(auto& next_phi_vec : next_phi_gathered) + if(nn < _max_phi) { - _phi.reserve(_phi.size() + next_phi_vec.size()); - for(auto& feat : next_phi_vec) + std::vector<std::vector<node_ptr>> next_phi_gathered; + mpi::all_gather(*_mpi_comm, next_phi, next_phi_gathered); + + feat_ind = _phi.size(); + for(auto& next_phi_vec : next_phi_gathered) { - feat->reindex(feat_ind); - _phi.push_back(feat); - ++feat_ind; + _phi.reserve(_phi.size() + next_phi_vec.size()); + for(auto& feat : next_phi_vec) + { + feat->reindex(feat_ind); + _phi.push_back(feat); + ++feat_ind; + } + } + if(nn <= _n_rung_store) + { + // bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store); + bool use_temp = true; + node_value_arrs::resize_values_arr(_n_rung_store, _phi.size(), use_temp); + for(int ff = _start_gen[0]; ff < _phi.size(); ++ff) + _phi[ff]->set_value(); } } - if(nn <= _n_rung_store) + else { - bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store); - node_value_arrs::resize_values_arr(_n_rung_store, _phi.size(), use_temp); - for(int ff = _start_gen[0]; ff < _phi.size(); ++ff) - _phi[ff]->set_value(); + std::cout << "HERE" << std::endl; + std::vector<size_t> next_phi_sizes; + mpi::all_gather(*_mpi_comm, next_phi.size(), next_phi_sizes); + size_t n_feat = std::accumulate(next_phi_sizes.begin(), next_phi_sizes.end(), _phi.size()); + size_t n_feat_rank = n_feat / _mpi_comm->size(); + size_t n_feat_below_rank = _mpi_comm->rank() * n_feat_rank; + if(_mpi_comm->rank() < n_feat % _mpi_comm->size()) + { + ++n_feat_rank; + n_feat_below_rank += _mpi_comm->rank(); + } + else + { + n_feat_below_rank += n_feat % _mpi_comm->size(); + } + if(n_feat_below_rank + n_feat_rank <= _phi.size()) + { + _phi.erase(_phi.begin(), _phi.begin() + n_feat_below_rank); + _phi.erase(_phi.begin() + n_feat_rank, _phi.end()); + } + else if(n_feat_below_rank <= _phi.size()) + { + _phi.erase(_phi.begin(), _phi.begin() + n_feat_below_rank); + } + else + { + _phi = {}; + } + while((_phi.size() < n_feat_rank) && (next_phi.size() > 0)) + { + next_phi.back()->reindex(_phi.size() + n_feat_below_rank); + _phi.push_back(next_phi.back()); + next_phi.pop_back(); + } + // This can be calculated without an all_gather, using it to not introduce too many things at one time + std::vector<size_t> next_phi_needed; + std::vector<size_t> next_phi_excess; + mpi::all_gather(*_mpi_comm, next_phi.size(), next_phi_excess); + mpi::all_gather(*_mpi_comm, n_feat_rank - _phi.size(), next_phi_needed); + + std::vector<size_t> send_sizes(next_phi_sizes.size(), 0); + std::vector<size_t> recv_sizes(next_phi_sizes.size(), 0); + + // Is this rank sending or receiving? + std::cout << "HERE 2" << std::endl; + std::cout << std::accumulate(next_phi_excess.begin(), next_phi_excess.end(), 0); + std::cout << std::accumulate(next_phi_needed.begin(), next_phi_needed.end(), 0); + if(next_phi_excess[_mpi_comm->rank()] > 0) + { + size_t total_sent = std::accumulate(next_phi_excess.begin(), next_phi_excess.begin() + _mpi_comm->rank(), 0); + size_t prev_sent_recv = 0; + size_t send_size = 0; + int ind = 0; + while((prev_sent_recv < total_sent) && (ind < _mpi_comm->size())) + { + prev_sent_recv += next_phi_needed[ind]; + ++ind; + } + send_size = std::min(next_phi.size(), next_phi_needed[ind-1]); + send_sizes[ind-1] = send_size; + total_sent = send_size; + while((total_sent < next_phi.size()) && (ind < _mpi_comm->size())) + { + send_size = std::min(next_phi.size() - total_sent, next_phi_needed[ind]); + send_sizes[ind] = send_size; + total_sent += send_size; + } + + total_sent = 0; + for(int pp = 0; pp < send_sizes.size(); ++pp) + { + if(send_sizes[pp] == 0) + continue; + + std::vector<node_ptr> to_send(send_sizes[pp]); + std::copy_n(next_phi.begin() + total_sent, send_sizes[pp], to_send.begin()); + _mpi_comm->send(pp, _mpi_comm->cantorTagGen(_mpi_comm->rank(), pp, 1, 0), to_send); + total_sent += send_sizes[pp]; + } + } + else + { + size_t total_recv = std::accumulate(next_phi_excess.begin(), next_phi_excess.begin() + _mpi_comm->rank(), 0); + size_t prev_recv_sent = 0; + size_t recv_size = 0; + int ind = 0; + while((prev_recv_sent < total_recv) && (ind < _mpi_comm->size())) + { + prev_recv_sent += next_phi_excess[ind]; + ++ind; + } + recv_size = std::min(next_phi.size(), next_phi_excess[ind-1]); + recv_sizes[ind-1] = recv_size; + total_recv = recv_size; + while((_phi.size() < n_feat_rank) && (ind < _mpi_comm->size())) + { + recv_size = std::min(_phi.size() + total_recv, next_phi_excess[ind]); + recv_sizes[ind] = recv_size; + total_recv += recv_size; + } + + total_recv = 0; + for(int pp = 0; pp < recv_sizes.size(); ++pp) + { + if(recv_sizes[pp] == 0) + continue; + + std::vector<node_ptr> to_recv; + _mpi_comm->recv(pp, _mpi_comm->cantorTagGen(_mpi_comm->rank(), pp, 1, 0), to_recv); + for(auto& feat : to_recv) + { + feat->reindex(_phi.size() + n_feat_below_rank); + _phi.push_back(feat); + } + } + } + } } _n_feat = _phi.size(); @@ -154,39 +277,31 @@ void FeatureSpace::generate_feature_space() void FeatureSpace::project_r(double* prop) { - std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size(), 0); - - std::vector<double> scores(start_end[1] - start_end[0], 0.0); - for(int ff = start_end[0]; ff < start_end[1]; ++ff) - scores[ff - start_end[0]] = -1.0 * std::abs(util_funcs::r(prop, _phi[ff]->value_ptr(), _n_samp)); - - std::vector<std::vector<double>> all_scores; - mpi::all_gather(*_mpi_comm, scores, all_scores); - - int iter_start = 0; - for(auto& score_vec : all_scores) - { - for(int ii = 0; ii < score_vec.size(); ++ii) - { - _scores[ii + iter_start] = score_vec[ii]; - } - iter_start += score_vec.size(); - } + std::vector<double> scores(_phi.size(), 0.0); + for(int ff = 0; ff < _phi.size(); ++ff) + _scores[ff] = -1.0 * std::abs(util_funcs::r(prop, _phi[ff]->value_ptr(), _n_samp)); } void FeatureSpace::sis(std::vector<double>& prop) { int cur_feat = _D.size() / prop.size(); + int previous_size = _D.size(); + _D.resize(_D.size() + _n_sis_select * prop.size()); _D.reserve(_D.size()); - _phi_selected.reserve(_phi_selected.size()); + _phi_selected.reserve(_phi_selected.size() + _n_sis_select); project_r(prop.data()); + std::vector<int> inds = util_funcs::argsort(_scores); + std::vector<double> D_temp(_n_sis_select * prop.size()); + std::vector<double> scores_selected(_n_sis_select, 0.0); + std::vector<node_ptr> phi_slected(_n_sis_select, nullptr); int ii = 0; - while((cur_feat != _D.size() / prop.size()) && (ii < _scores.size())) + int cur_feat_local = 0; + while((cur_feat_local != D_temp.size() / prop.size()) && (ii < _scores.size())) { bool is_valid = true; for(int dd = 0; dd < cur_feat; ++dd) @@ -197,16 +312,74 @@ void FeatureSpace::sis(std::vector<double>& prop) break; } } + for(int dd = 0; dd < cur_feat_local; ++dd) + { + if(1.0 - std::abs(util_funcs::r(&D_temp[dd*prop.size()], _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13) + { + is_valid = false; + break; + } + } + if(is_valid) + { + std::copy_n(_phi[inds[ii]]->value_ptr(), prop.size(), &D_temp[cur_feat_local * prop.size()]); + scores_selected[cur_feat_local] = _scores[inds[ii]]; + phi_slected[cur_feat_local] = _phi[inds[ii]]; + ++cur_feat_local; + } + ++ii; + } + phi_slected.resize(cur_feat_local); + scores_selected.resize(cur_feat_local); + D_temp.resize(cur_feat_local * prop.size()); + + std::vector<std::vector<double>> all_scores; + std::vector<std::vector<double>> all_D_temp; + std::vector<std::vector<node_ptr>> all_phi_selected; + + mpi::all_gather(*_mpi_comm, scores_selected, all_scores); + mpi::all_gather(*_mpi_comm, D_temp, all_scores); + mpi::all_gather(*_mpi_comm, phi_slected, all_phi_selected); + + int iter_start = 0; + + scores_selected = {}; + D_temp = {}; + phi_slected = {}; + + for(int sv = 0; sv < _mpi_comm->size(); ++sv) + { + scores_selected.resize(all_scores[sv].size() + scores_selected.size()); + D_temp.resize(all_D_temp[sv].size() + D_temp.size()); + std::copy_n(all_scores[sv].begin(), all_scores[sv].size(), scores_selected.end() - all_scores[sv].size()); + std::copy_n(all_D_temp[sv].begin(), all_D_temp[sv].size(), D_temp.end() - all_D_temp[sv].size()); + std::copy_n(all_phi_selected[sv].begin(), all_phi_selected[sv].size(), phi_slected.end() - all_phi_selected[sv].size()); + } + + inds = util_funcs::argsort(scores_selected); + cur_feat_local = 0; + while((cur_feat != _D.size() / prop.size()) && (ii < scores_selected.size())) + { + bool is_valid = true; + for(int dd = 0; dd < cur_feat_local; ++dd) + { + if(1.0 - std::abs(util_funcs::r(&_D[previous_size + dd * prop.size()], &D_temp[inds[ii] * prop.size()], prop.size())) < 1e-13) + { + is_valid = false; + break; + } + } if(is_valid) { - std::copy_n(_phi[inds[ii]]->value_ptr(), prop.size(), &_D[cur_feat * prop.size()]); + std::copy_n(&D_temp[inds[ii] * prop.size()], prop.size(), &_D[cur_feat * prop.size()]); + _phi_selected.push_back(phi_slected[inds[ii]]); + _phi_selected.back()->set_value(); + ++cur_feat_local; ++cur_feat; - - _phi_selected.push_back(_phi[inds[ii]]); } ++ii; } - if(ii >= _scores.size()) + if(cur_feat != _D.size() / prop.size()) throw std::logic_error("SIS went through all features and did not select enough."); } diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index ed77db523d168445b1aa6814a5592d023d4dcd11..68d831328f02dba6526062d6ab1659426089c691 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -41,7 +41,6 @@ class FeatureSpace std::vector<node_ptr> _phi_selected; //!< selected features std::vector<node_ptr> _phi; //!< all features std::vector<node_ptr> _phi_0; //!< initial feature space - public: /** * @brief Constructor for the feature space @@ -117,6 +116,15 @@ public: * @param prop The property to calculate SIS from */ void sis(std::vector<double>& prop); + + /** + * @brief Is a feature in this process' _phi? + * + * @param ind index + * @return True if feature is in this _phi + */ + inline bool feat_in_phi(int ind){return (ind >= _phi[0]->feat_ind()) && (ind <= _phi.back()->feat_ind());} + }; #endif \ No newline at end of file diff --git a/src/utils/math_funcs.hpp b/src/utils/math_funcs.hpp index a17f5699caea89915cf2074b81071d0623ce4e4b..04301c59b5bd567b88bf09385011dd104cf58bc6 100644 --- a/src/utils/math_funcs.hpp +++ b/src/utils/math_funcs.hpp @@ -15,6 +15,9 @@ namespace util_funcs inline double mean(double* start, int size){return std::accumulate(start, start + size, 0.0) / size;}; + inline double mean(std::vector<int>& vec){return static_cast<double>(std::accumulate(vec.begin(), vec.end(), 0)) / vec.size();}; + + inline double mean(int* start, int size){return static_cast<double>(std::accumulate(start, start + size, 0)) / size;}; inline double stand_dev(std::vector<double>& vec) {