From f26b6fd651ad9c14566a012975915f531c862d64 Mon Sep 17 00:00:00 2001 From: Thomas <purcell@fhi-berlin.mpg.de> Date: Tue, 8 Jun 2021 08:15:53 +0200 Subject: [PATCH] Reparameterization with addtion scheme now working _reparam_phi now seems to work correctly --- .../feature_space/FeatureSpace.cpp | 93 ++++++++++--------- .../feature_space/FeatureSpace.hpp | 6 +- 2 files changed, 55 insertions(+), 44 deletions(-) diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index c376fd56..1c72bdef 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -237,6 +237,7 @@ void FeatureSpace::initialize_fs_output_files() const void FeatureSpace::generate_param_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, + const std::vector<node_ptr>::iterator& start, unsigned long int& feat_ind, std::shared_ptr<NLOptimizer> optimizer, const double l_bound, @@ -253,14 +254,14 @@ void FeatureSpace::generate_param_feats( for(auto& op : _com_bin_param_operators) { - for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2) + for(auto feat_2 = start; feat_2 != feat; ++feat_2) { op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer); } } for(auto& op : _bin_param_operators) { - for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2) + for(auto feat_2 = start; feat_2 != feat; ++feat_2) { op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer); op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer); @@ -349,12 +350,12 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) #pragma omp for schedule(dynamic) for(auto feat_1 = _phi_reparam.begin() + _start_gen_reparam.back() + _mpi_comm->rank(); feat_1 < _phi_reparam.end(); feat_1 += _mpi_comm->size()) { - generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound); - generate_param_feats(feat_1, next_phi_private, feat_ind, optimizer_param, l_bound, u_bound); + generate_non_param_feats(feat_1, next_phi_private, _phi_reparam.begin(), feat_ind, l_bound, u_bound); + generate_param_feats(feat_1, next_phi_private, _phi_reparam.begin(), feat_ind, optimizer_param, l_bound, u_bound); } #pragma omp for schedule(dynamic) - for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size()) + for(auto feat_1 = _phi.begin() + _start_gen[nn-1] + _mpi_comm->rank(); feat_1 < _phi.begin() + _end_no_params[nn-1]; feat_1 += _mpi_comm->size()) { generate_reparam_feats(feat_1, next_phi_private, feat_ind, optimizer_reparam, l_bound, u_bound); } @@ -367,29 +368,29 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) if(nn < _max_phi) { int new_phi_size; - int phi_size_start = _phi.size(); + int phi_size_start = _phi_reparam.size(); if(_mpi_comm->rank() == 0) { std::vector<std::vector<node_ptr>> next_phi_gathered; mpi::gather(*_mpi_comm, next_phi, next_phi_gathered, 0); - feat_ind = _phi.size(); + feat_ind = _phi_reparam.size(); for(auto& next_phi_vec : next_phi_gathered) { - _phi.insert(_phi.end(), next_phi_vec.begin(), next_phi_vec.end()); + _phi_reparam.insert(_phi_reparam.end(), next_phi_vec.begin(), next_phi_vec.end()); } - new_phi_size = _phi.size(); + new_phi_size = _phi_reparam.size(); // Sort the features to ensure consistent feature spaces for all MPI/OpenMP configurations std::sort( - _phi.begin() + _start_gen.back(), - _phi.end(), + _phi_reparam.begin() + _start_gen_reparam.back(), + _phi_reparam.end(), [feat_ind](node_ptr n1, node_ptr n2){return n1->sort_score(feat_ind) < n2->sort_score(feat_ind);} ); // Reindex sorted features std::for_each( - _phi.begin() + _start_gen.back(), - _phi.end(), + _phi_reparam.begin() + _start_gen_reparam.back(), + _phi_reparam.end(), [&feat_ind](node_ptr n){n->reindex(feat_ind); ++feat_ind;} ); @@ -397,18 +398,18 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb) { - mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0); + mpi::broadcast(*_mpi_comm, &_phi_reparam[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0); } } else { mpi::gather(*_mpi_comm, next_phi, 0); mpi::broadcast(*_mpi_comm, new_phi_size, 0); - _phi.resize(new_phi_size); + _phi_reparam.resize(new_phi_size); for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb) { - mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0); + mpi::broadcast(*_mpi_comm, &_phi_reparam[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0); } } @@ -431,7 +432,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) _mpi_comm->barrier(); node_value_arrs::clear_temp_reg(); - for(int sc = 0; sc < _scores.size() - 1; ++sc) + for(int sc = 0; sc < scores.size() - 1; ++sc) { #ifdef PARAMETERIZE if(_phi_reparam[inds[sc] + _start_gen_reparam.back()]->n_params() > 0) @@ -440,7 +441,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) } #endif - if(_scores[inds[sc]] > -1e-10) + if(scores[inds[sc]] > -1e-10) { double base_val = std::abs( util_funcs::r( @@ -449,7 +450,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) _n_samp ) ); - for(int sc2 = sc + 1; sc2 < _scores.size(); ++sc2) + for(int sc2 = sc + 1; sc2 < scores.size(); ++sc2) { double comp = std::abs( base_val - std::abs( @@ -467,7 +468,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) } } } - else if(_scores[inds[sc + 1]] - _scores[inds[sc]] < 1e-10) + else if(scores[inds[sc + 1]] - scores[inds[sc]] < 1e-10) { double base_val = std::abs( util_funcs::r( @@ -499,26 +500,26 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) } // Reindex - for(int ff = _start_gen.back(); ff < _phi_reparam.size(); ++ff) + for(int ff = _start_gen_reparam.back(); ff < _phi_reparam.size(); ++ff) { - _phi_reparam[ff]->reindex(ff); + _phi_reparam[ff]->reindex(ff + _n_feat); } } } } - _n_feat = _phi.size(); } #endif void FeatureSpace::generate_non_param_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, + const std::vector<node_ptr>::iterator& start, unsigned long int& feat_ind, const double l_bound, const double u_bound ) { - unsigned long int phi_ind = feat - _phi.begin(); + unsigned long int phi_ind = feat - start; feat_set.reserve(feat_set.size() + _un_operators.size() + phi_ind * (_com_bin_operators.size() + 2 * _bin_operators.size())); for(auto& op : _un_operators) @@ -528,7 +529,7 @@ void FeatureSpace::generate_non_param_feats( for(auto& op : _com_bin_operators) { - for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2) + for(auto feat_2 = start; feat_2 < feat; ++feat_2) { op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound); } @@ -536,7 +537,7 @@ void FeatureSpace::generate_non_param_feats( for(auto& op : _bin_operators) { - for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2) + for(auto feat_2 = start; feat_2 < feat; ++feat_2) { op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound); op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound); @@ -574,8 +575,8 @@ void FeatureSpace::generate_feature_space() #pragma omp for schedule(dynamic) for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size()) { - generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound); - generate_param_feats(feat_1, next_phi_private, feat_ind, optimizer, l_bound, u_bound); + generate_non_param_feats(feat_1, next_phi_private, _phi.begin(), feat_ind, l_bound, u_bound); + generate_param_feats(feat_1, next_phi_private, _phi.begin(), feat_ind, optimizer, l_bound, u_bound); } #pragma omp critical @@ -588,7 +589,7 @@ void FeatureSpace::generate_feature_space() #pragma omp for schedule(dynamic) for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size()) { - generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound); + generate_non_param_feats(feat_1, next_phi_private, _phi.begin(), feat_ind, l_bound, u_bound); } #pragma omp critical @@ -917,12 +918,11 @@ void FeatureSpace::generate_feature_space() _phi.begin() + _start_gen.back(), [&next_phi](int ind){return next_phi[ind];} ); - int cur_ind = _start_gen.back(); - std::for_each( - _phi.begin() + _start_gen.back(), - _phi.end(), - [&cur_ind](node_ptr feat){feat->reindex(cur_ind); ++cur_ind;} - ); + for(int ff = _start_gen.back(); ff < _phi.size(); ++ff) + { + _phi[ff]->reindex(ff); + _phi[ff]->set_value(); + } // Set how many features have no parameters _end_no_params.push_back( @@ -977,16 +977,16 @@ void FeatureSpace::project_generated(const double* prop, const int size, std::ve bool is_sel = (*feat)->selected(); (*feat)->set_selected(false); - #ifdef PARAMETERIZE - generate_non_param_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound); - generate_param_feats(feat, generated_phi, feat_ind, optimizer, _l_bound, _u_bound); - if(reparam_optimizer) +#ifdef PARAMETERIZE + generate_non_param_feats(feat, generated_phi, _phi.begin(), feat_ind, _l_bound, _u_bound); + generate_param_feats(feat, generated_phi, _phi.begin(), feat_ind, optimizer, _l_bound, _u_bound); + if(reparam_optimizer && (feat < _phi.begin() + _end_no_params.back())) { generate_reparam_feats(feat, generated_phi, feat_ind, reparam_optimizer, _l_bound, _u_bound); } - #else - generate_non_param_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound); - #endif +#else + generate_non_param_feats(feat, generated_phi, _phi.begin(), feat_ind, _l_bound, _u_bound); +#endif (*feat)->set_selected(is_sel); if(generated_phi.size() == 0) @@ -1092,8 +1092,12 @@ void FeatureSpace::sis(const std::vector<double>& prop) if(_reparam_residual && (_phi_selected.size() > 0)) { double start_time = omp_get_wtime(); + _phi.resize(_n_feat); _phi_reparam.resize(0); + _start_gen_reparam.resize(0); generate_reparam_feature_set(prop); + _phi.insert(_phi.end(), _phi_reparam.begin(), _phi_reparam.end()); + _scores.resize(_phi.size()); } #endif // Create output directories if needed @@ -1180,6 +1184,11 @@ void FeatureSpace::sis(const std::vector<double>& prop) // Generate features on the fly and get the best features for this rank if(_n_rung_generate > 0) { +#ifdef PARAMETERIZE + _phi.resize(_n_feat); + _phi.insert(_phi.end(), _phi_reparam.begin() + _start_gen_reparam.back(), _phi_reparam.end()); + _scores.resize(_phi.size()); +#endif start_time = omp_get_wtime(); phi_sel.resize(cur_feat_local); scores_sel.resize(cur_feat_local); diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index 36677682..fdf36523 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -286,12 +286,13 @@ public: void generate_non_param_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, + const std::vector<node_ptr>::iterator& start, unsigned long int& feat_ind, const double l_bound=1e-50, const double u_bound=1e50 ); - #ifdef PARAMETERIZE +#ifdef PARAMETERIZE /** * @brief Generate a new set of parameterized features from a single feature * @details Take in the feature and perform all valid algebraic operations on it. @@ -306,6 +307,7 @@ public: void generate_param_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, + const std::vector<node_ptr>::iterator& start, unsigned long int& feat_ind, std::shared_ptr<NLOptimizer> optimizer, const double l_bound=1e-50, @@ -337,7 +339,7 @@ public: * @param prop The property to optimize against */ void generate_reparam_feature_set(const std::vector<double>& prop); - #endif +#endif /** * @brief Calculate the SIS Scores for feature generated on the fly -- GitLab