diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index e5e64bb749291cae8ed97900e82e9698069ae435..f6c7e141530df5c5fe3afd5658c53a8433a0d776 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -22,6 +22,7 @@ FeatureSpace::FeatureSpace( std::shared_ptr<MPI_Interface> mpi_comm, std::vector<node_ptr> phi_0, std::vector<std::string> allowed_ops, + std::vector<double> prop, std::vector<int> task_sizes, int max_phi, int n_sis_select, @@ -47,6 +48,7 @@ FeatureSpace::FeatureSpace( _n_rung_generate(n_rung_generate) { _project = project_funcs::project_r; + if(_n_rung_generate > 1) throw std::logic_error("A maximum of one rung can be generated on the fly."); else if(_max_phi - _n_rung_generate < _n_rung_store) @@ -61,7 +63,7 @@ FeatureSpace::FeatureSpace( else _un_operators.push_back(allowed_op_maps::unary_operator_map[op]); } - generate_feature_space(); + generate_feature_space(prop); _scores.reserve(_phi.size()); _scores.resize(_phi.size()); } @@ -126,10 +128,12 @@ void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std } } -void FeatureSpace::generate_feature_space() +void FeatureSpace::generate_feature_space(std::vector<double>& prop) { double u_bound = 1e50; double l_bound = 1e-50; + std::vector<int> inds; + for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn) { if(nn == _max_phi) @@ -142,9 +146,7 @@ void FeatureSpace::generate_feature_space() int feat_ind = _phi.size(); for(auto feat_1 = _phi.begin() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size()) - { generate_new_feats(feat_1, next_phi, feat_ind, l_bound, u_bound); - } _mpi_comm->barrier(); _start_gen.push_back(_phi.size()); @@ -164,6 +166,35 @@ void FeatureSpace::generate_feature_space() ++feat_ind; } } + + if(nn < _max_phi) + { + // Remove identical features + _scores.resize(_phi.size()); + _project(prop.data(), _scores.data(), _phi, _task_sizes, 1); + _scores.erase(_scores.begin(), _scores.begin() + _start_gen[_start_gen.size() - 1]); + inds = util_funcs::argsort(_scores); + + std::vector<int> del_inds; + + for(int sc = _scores.size() - 1; sc > 0; --sc) + { + if(_scores[inds[sc]] - _scores[inds[sc] - 1] < 1e-10) + { + if(1.0 - std::abs(util_funcs::r(_phi[_start_gen.back() + inds[sc]]->value_ptr(), _phi[_start_gen.back() + inds[sc - 1]]->value_ptr(), _n_samp)) < 1e-13) + del_inds.push_back(-1 * (inds[sc] + _start_gen.back())); + } + } + inds = util_funcs::argsort(del_inds); + + for(int ii = 0; ii < inds.size(); ++ii) + _phi.erase(_phi.begin() - del_inds[inds[ii]]); + + // Reindex + for(int ff = _start_gen.back(); ff < _phi.size(); ++ff) + _phi[ff]->reindex(ff); + } + if(nn <= _n_rung_store) { bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store); @@ -500,7 +531,7 @@ void FeatureSpace::sis(std::vector<double>& prop) inds = util_funcs::argsort(sent_scores); for(int ii = 0; ii < _n_sis_select; ++ii) { - std::cout << sent_scores[inds[ii]] << '\t' << sent_phi[inds[ii]]->expr() << std::endl; + std::cout << std::setw(22) << std::setprecision(18) << std::left << sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl; _phi_selected.push_back(sent_phi[inds[ii]]); _phi_selected.back()->reindex(cur_feat); ++cur_feat; @@ -522,7 +553,7 @@ void FeatureSpace::sis(std::vector<double>& prop) { if(valid_score_against_current(cur_feat_local, sent_phi[inds[ii]]->value().data(), sent_scores[inds[ii]], scores_sel, scores_comp)) { - std::cout << sent_scores[inds[ii]] << '\t' << sent_phi[inds[ii]]->expr() << std::endl; + std::cout << std::setw(22) << std::setprecision(18) << std::left << sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl; _phi_selected.push_back(sent_phi[inds[ii]]); _phi_selected.back()->reindex(cur_feat); _phi_selected.back()->set_value(); @@ -555,7 +586,7 @@ void FeatureSpace::sis(std::vector<double>& prop) cur_feat_local = 0; for(auto& feat : phi_sel) { - std::cout << scores_sel[cur_feat_local] << '\t' << phi_sel[cur_feat_local]->expr() << std::endl; + std::cout << std::setw(22) << std::setprecision(18) << std::left << scores_sel[cur_feat_local] << phi_sel[cur_feat_local]->expr() << std::endl; _phi_selected.push_back(feat); _phi_selected.back()->reindex(cur_feat); _phi_selected.back()->set_value(); diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index 1299afcf22cc23d69af69cc5bdb38f4c9c176139..76c06f4dcb991bb66eca55e331aab0cff8c74053 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -64,6 +64,7 @@ public: std::shared_ptr<MPI_Interface> mpi_comm, std::vector<node_ptr> phi_0, std::vector<std::string> allowed_ops, + std::vector<double> prop, std::vector<int> task_sizes, int max_phi=1, int n_sis_select=1, @@ -77,7 +78,7 @@ public: * @brief Generate the full feature set from the allowed operators and initial feature set * @details populates phi with all features from an initial set and the allowed operators */ - void generate_feature_space(); + void generate_feature_space(std::vector<double>& prop); /** * @brief Accessor function for _phi_selected diff --git a/src/inputs/InputParser.cpp b/src/inputs/InputParser.cpp index 7308002ae004f43b3f62e695eadc3fe20be8c1e0..c444ac2eb5cbd50b440f67bc34362beac71037d6 100644 --- a/src/inputs/InputParser.cpp +++ b/src/inputs/InputParser.cpp @@ -268,7 +268,7 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm, st for(int ff = 0; ff < headers.size(); ++ff) phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], test_data[ff], units[ff])); - _feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _task_sizes_train, _max_rung, _n_sis_select, _max_store_rung, _n_rung_generate, _l_bound, _u_bound); + _feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _prop_train, _task_sizes_train, _max_rung, _n_sis_select, _max_store_rung, _n_rung_generate, _l_bound, _u_bound); } void stripComments(std::string& filename) diff --git a/src/utils/math_funcs.hpp b/src/utils/math_funcs.hpp index 789275d5f2c7e18d289a6c9b739375919d000925..54f6671e23f324a91c22990b140221743f96350b 100644 --- a/src/utils/math_funcs.hpp +++ b/src/utils/math_funcs.hpp @@ -81,6 +81,20 @@ namespace util_funcs std::sort(begin, end, [&vec](int i1, int i2){return vec[i1] < vec[i2];}); } + inline std::vector<int> argsort(std::vector<int>& vec) + { + std::vector<int> index(vec.size()); + std::iota(index.begin(), index.end(), 0); + std::sort(index.begin(), index.end(), [&vec](int i1, int i2){return vec[i1] < vec[i2];}); + + return index; + } + + inline void argsort(int* begin, int* end, std::vector<int>& vec) + { + std::sort(begin, end, [&vec](int i1, int i2){return vec[i1] < vec[i2];}); + } + inline int factorial(int n) { return (n == 1 || n == 0) ? 1 : factorial(n - 1) * n;