diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index a2f6f63105afdc5f66683acc118d44af79c451d2..4eac0be61ac182b3edab00a5decef8556da28836 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -295,7 +295,7 @@ void FeatureSpace::sis(std::vector<double>& prop) std::vector<int> inds = util_funcs::argsort(_scores); std::vector<double> scores_selected(_n_sis_select, 0.0); - std::vector<int> inds_selected(_n_sis_select, 0.0); + std::vector<int> inds_selected(_n_sis_select, -1); std::vector<node_ptr> phi_selected; phi_selected.reserve(_n_sis_select); @@ -340,36 +340,21 @@ void FeatureSpace::sis(std::vector<double>& prop) } ++ii; } - phi_selected.resize(cur_feat_local); - inds_selected.resize(cur_feat_local); - scores_selected.resize(cur_feat_local); + phi_selected.resize(_n_sis_select); + // inds_selected.resize(cur_feat_local); + // scores_selected.resize(cur_feat_local); // If we are only on one process then phi_selected are the selected features if(_mpi_comm->size() > 1) { // Prepare to get all scores, features, and indicies - std::vector<std::vector<double>> all_scores; - std::vector<std::vector<node_ptr>> all_phi; - std::vector<std::vector<int>> all_inds; - - // Prepare to flatten the score and index arrays - std::vector<double> sent_scores; - std::vector<int> sent_inds; + std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), 0.0); + std::vector<int> sent_inds(_n_sis_select * _mpi_comm->size(), -1); + std::vector<node_ptr> sent_phi(_n_sis_select * _mpi_comm->size(), nullptr); // Get the selected scores and indicies from all processes - mpi::all_gather(*_mpi_comm, scores_selected, all_scores); - mpi::all_gather(*_mpi_comm, inds_selected, all_inds); - - // Flatten arrays - for(int sv = 0; sv < _mpi_comm->size(); ++sv) - { - int cur_ind = sent_scores.size(); - sent_scores.resize(all_scores[sv].size() + sent_scores.size()); - sent_inds.resize(sent_scores.size()); - - std::copy_n(all_scores[sv].begin(), all_scores[sv].size(), &sent_scores[cur_ind]); - std::copy_n(all_inds[sv].begin(), all_inds[sv].size(), &sent_inds[cur_ind]); - } + mpi::all_gather(*_mpi_comm, scores_selected.data(), _n_sis_select, sent_scores.data()); + mpi::all_gather(*_mpi_comm, inds_selected.data(), _n_sis_select, sent_inds.data()); // Sort the scores and see how many features we need to guarantee we get n_sis unique features inds = util_funcs::argsort(sent_scores); @@ -396,32 +381,21 @@ void FeatureSpace::sis(std::vector<double>& prop) inds_selected.erase(inds_selected.begin() + del_ind); } } - + scores_selected.resize(_n_sis_select, 0.0); + phi_selected.resize(_n_sis_select, nullptr); // Get all selected features and scores - mpi::all_gather(*_mpi_comm, phi_selected, all_phi); - mpi::all_gather(*_mpi_comm, scores_selected, all_scores); + std::fill_n(sent_scores.begin(), sent_scores.size(), 0.0); + mpi::all_gather(*_mpi_comm, scores_selected.data(), _n_sis_select, sent_scores.data()); + mpi::all_gather(*_mpi_comm, phi_selected.data(), _n_sis_select, sent_phi.data()); + + inds = util_funcs::argsort(sent_scores); // If ii == unique_score then all features gathered are valid if(ii == unique_scores) { - std::vector<node_ptr> sent_phi; - sent_scores = {}; - - // Flatten score and feature arrays - int cur_ind = 0; - for(int sv = 0; sv < _mpi_comm->size(); ++sv) + for(int ii = 0; ii < _n_sis_select; ++ii) { - cur_ind = sent_scores.size(); - - sent_scores.resize(all_scores[sv].size() + sent_scores.size()); - sent_phi.resize(sent_scores.size()); - std::copy_n(all_scores[sv].begin(), all_scores[sv].size(), &sent_scores[cur_ind]); - std::copy_n(all_phi[sv].begin(), all_phi[sv].size(), &sent_phi[cur_ind]); - } - inds = util_funcs::argsort(sent_scores); - for(int ii = 0; ii < inds.size(); ++ii) - { _phi_selected.push_back(sent_phi[inds[ii]]); _phi_selected.back()->reindex(cur_feat); _phi_selected.back()->set_value(); @@ -434,30 +408,11 @@ void FeatureSpace::sis(std::vector<double>& prop) // Clear out the previous D matrix values (from creation of each process' phi_selected) std::fill_n(node_value_arrs::get_d_matrix_ptr(cur_feat), _n_sis_select * node_value_arrs::N_SAMPLES, 0.0); - // Gather all scores - - std::vector<node_ptr> sent_phi; - sent_scores = {}; - - // Flatten score and feature arrays - int cur_ind = 0; - for(int sv = 0; sv < _mpi_comm->size(); ++sv) - { - cur_ind = sent_scores.size(); - - sent_scores.resize(all_scores[sv].size() + sent_scores.size()); - sent_phi.resize(sent_scores.size()); - - std::copy_n(all_scores[sv].begin(), all_scores[sv].size(), &sent_scores[cur_ind]); - std::copy_n(all_phi[sv].begin(), all_phi[sv].size(), &sent_phi[cur_ind]); - } - cur_feat_local = 0; ii = 0; std::fill_n(scores_comp.begin(), _n_sis_select, 1.0); scores_selected = std::vector<double>(_n_sis_select, 0.0); - inds = util_funcs::argsort(sent_scores); // Get the n_sis_select best features (compare against features sent from other processes) while((cur_feat != node_value_arrs::N_SELECTED) && (ii < sent_scores.size())) { @@ -490,6 +445,7 @@ void FeatureSpace::sis(std::vector<double>& prop) for(auto& feat : phi_selected) _phi_selected.push_back(feat); } + std::cout << cur_feat << std::endl; if(cur_feat != node_value_arrs::N_SELECTED) throw std::logic_error("SIS went through all features and did not select enough."); }