From 28c1da4226bee99f88b2a0e9a0c16a05568d5eab Mon Sep 17 00:00:00 2001 From: Thomas Purcell <purcell@fhi-berlin.mpg.de> Date: Fri, 12 Jun 2020 20:36:30 +0200 Subject: [PATCH] Update past feature checks to use scores limit the number of operations done --- src/descriptor_identifier/Model/Model.cpp | 2 +- src/descriptor_identifier/Model/Model.hpp | 4 +- src/descriptor_identifier/SISSORegressor.cpp | 2 +- .../feature_space/FeatureSpace.cpp | 65 +++++++++---------- .../feature_space/FeatureSpace.hpp | 8 +-- 5 files changed, 38 insertions(+), 43 deletions(-) diff --git a/src/descriptor_identifier/Model/Model.cpp b/src/descriptor_identifier/Model/Model.cpp index 10eb8175..b8968f25 100644 --- a/src/descriptor_identifier/Model/Model.cpp +++ b/src/descriptor_identifier/Model/Model.cpp @@ -1,6 +1,6 @@ #include <descriptor_identifier/Model/Model.hpp> -Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) : +Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) : _n_samp_train(feats[0]->n_samp()), _n_samp_test(feats[0]->n_test_samp()), _n_dim(feats.size() + 1), diff --git a/src/descriptor_identifier/Model/Model.hpp b/src/descriptor_identifier/Model/Model.hpp index d660b6da..f52463d4 100644 --- a/src/descriptor_identifier/Model/Model.hpp +++ b/src/descriptor_identifier/Model/Model.hpp @@ -19,7 +19,7 @@ class Model int _n_samp_test; //!< The number of test samples per feature int _n_dim; //!< Dimension of the model - std::vector<std::shared_ptr<FeatureNode>> _feats; //!< List of features in the model + std::vector<node_ptr> _feats; //!< List of features in the model std::vector<std::vector<double>> _coefs; //!< Coefficients for teh features std::vector<double> _prop_train; //!< The property to be modeled @@ -41,7 +41,7 @@ public: * @param prop The property * @param feats The features for the model */ - Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test); + Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test); /** diff --git a/src/descriptor_identifier/SISSORegressor.cpp b/src/descriptor_identifier/SISSORegressor.cpp index 77b35c2a..0799aaed 100644 --- a/src/descriptor_identifier/SISSORegressor.cpp +++ b/src/descriptor_identifier/SISSORegressor.cpp @@ -176,7 +176,7 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) inds = util_funcs::argsort(all_min_error); - std::vector<std::shared_ptr<FeatureNode>> min_nodes(n_dim); + std::vector<node_ptr> min_nodes(n_dim); std::vector<Model> models; for(int rr = 0; rr < _n_residual; ++rr) diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index c740c4f6..e5e64bb7 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -295,8 +295,15 @@ void FeatureSpace::generate_feature_space() _n_feat = _phi.size(); } -void FeatureSpace::project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp) +void FeatureSpace::project_generated(double* prop, int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp) { + std::vector<double> scores_prev_sel; + if(node_value_arrs::N_SELECTED > _n_sis_select) + { + scores_prev_sel.resize(_phi_selected.size()); + _project(prop, scores_prev_sel.data(), _phi_selected, _task_sizes, size / _n_samp); + } + for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size()) { std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1); @@ -322,11 +329,11 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh bool is_valid = valid_score_against_current(end_check, generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_sel, scores_comp); // Check the feature against those selected from previous SIS iterations if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid) - is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores_comp); + is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp); if(is_valid) { - std::shared_ptr<FeatureNode> new_feat = std::make_shared<FeatureNode>(node_value_arrs::N_SELECTED - _n_sis_select + end_check, generated_phi[inds[ii]]->expr(), generated_phi[inds[ii]]->value(), generated_phi[inds[ii]]->test_value(), generated_phi[inds[ii]]->unit(), true); + node_ptr new_feat = std::make_shared<FeatureNode>(node_value_arrs::N_SELECTED - _n_sis_select + end_check, generated_phi[inds[ii]]->expr(), generated_phi[inds[ii]]->value(), generated_phi[inds[ii]]->test_value(), generated_phi[inds[ii]]->unit(), true); phi_sel.insert(phi_sel.begin() + end_check, new_feat); scores_sel.insert(scores_sel.begin() + end_check, cur_score); for(int jj = end_check + 1; jj < _n_sis_select; ++jj) @@ -345,19 +352,17 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh } } -bool FeatureSpace::valid_score_against_past(double* val_ptr, std::vector<double>& scores_comp) +bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp) { - double cur_feat_mean = util_funcs::mean(val_ptr, _n_samp); - double cur_feat_std = util_funcs::stand_dev(val_ptr, _n_samp); - - std::transform(val_ptr, val_ptr + _n_samp, val_ptr, [&cur_feat_mean, &cur_feat_std](double val){return (val - cur_feat_mean) / cur_feat_std;}); - - dgemv_('T', _n_samp, scores_comp.size(), 1.0 / static_cast<double>(_n_samp), node_value_arrs::D_MATRIX.data(), _n_samp, val_ptr, 1, 0.0, scores_comp.data(), 1); + std::transform(scores_past.begin(), scores_past.end(), scores_comp.begin(), [&cur_score](double score){return cur_score - score;}); - if(1.0 - util_funcs::max_abs_val<double>(scores_comp.data(), scores_comp.size()) < 1e-13) - return false; - - std::transform(val_ptr, val_ptr + _n_samp, val_ptr, [&cur_feat_mean, &cur_feat_std](double val){return val * cur_feat_std + cur_feat_mean;}); + // If two scores are the same then they are possibly the same feature, if not then they can't be + if(*std::min_element(scores_comp.begin(), scores_comp.end()) < 1e-10) + { + int dd = std::min_element(scores_comp.begin(), scores_comp.end()) - scores_comp.begin(); + if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1e-13) + return false; + } return true; } @@ -377,26 +382,14 @@ bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, d void FeatureSpace::sis(std::vector<double>& prop) { - std::vector<double> means(node_value_arrs::N_SELECTED); - std::vector<double> stand_devs(node_value_arrs::N_SELECTED); std::vector<double> scores_comp(std::max(node_value_arrs::N_SELECTED, _n_sis_select), 1.0); - std::vector<double> scores_sel(_n_sis_select, 0.0); - std::vector<std::shared_ptr<FeatureNode>> phi_sel; + + std::vector<node_ptr> phi_sel; phi_sel.reserve(_n_sis_select); int cur_feat = node_value_arrs::N_SELECTED; - // Standardize the description matrix - if(cur_feat > 0) - { - for(int dd = 0; dd < cur_feat; ++dd) - { - means[dd] = util_funcs::mean(node_value_arrs::get_d_matrix_ptr(dd), _n_samp); - stand_devs[dd] = util_funcs::stand_dev(node_value_arrs::get_d_matrix_ptr(dd), _n_samp); - std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return (val - means[dd]) / stand_devs[dd];}); - } - } node_value_arrs::resize_d_matrix_arr(_n_sis_select); _phi_selected.reserve(_phi_selected.size() + _n_sis_select); @@ -408,12 +401,19 @@ void FeatureSpace::sis(std::vector<double>& prop) int cur_feat_local = 0; double cur_score = 0.0; + std::vector<double> scores_prev_sel; + if(node_value_arrs::N_SELECTED > _n_sis_select) + { + scores_prev_sel.resize(_phi_selected.size()); + _project(prop.data(), scores_prev_sel.data(), _phi_selected, _task_sizes, prop.size() / _n_samp); + } + while((cur_feat_local != _n_sis_select) && (ii < _scores.size())) { bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp); // Check the feature against those selected from previous SIS iterations if(cur_feat > 0 && is_valid) - is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), scores_comp); + is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp); if(is_valid) { @@ -431,11 +431,6 @@ void FeatureSpace::sis(std::vector<double>& prop) project_generated(prop.data(), prop.size(), phi_sel, scores_sel, scores_comp); } - // Unstandardize the description matrix - if(cur_feat > 0) - for(int dd = 0; dd < cur_feat; ++dd) - std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return val * stand_devs[dd] + means[dd];}); - phi_sel.resize(_n_sis_select); scores_sel.resize(_n_sis_select); @@ -489,7 +484,7 @@ void FeatureSpace::sis(std::vector<double>& prop) if(_mpi_comm->rank() == 0) { std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), 0.0); - std::vector<std::shared_ptr<FeatureNode>> sent_phi(_n_sis_select * _mpi_comm->size()); + std::vector<node_ptr> sent_phi(_n_sis_select * _mpi_comm->size()); std::copy_n(scores_sel.begin(), _n_sis_select, sent_scores.begin()); std::copy_n(phi_sel.begin(), _n_sis_select, sent_phi.begin()); diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index 50a4d1e9..1299afcf 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -20,7 +20,7 @@ */ class FeatureSpace { - std::vector<std::shared_ptr<FeatureNode>> _phi_selected; //!< selected features + std::vector<node_ptr> _phi_selected; //!< selected features std::vector<node_ptr> _phi; //!< all features std::vector<node_ptr> _phi_0; //!< initial feature space @@ -82,7 +82,7 @@ public: /** * @brief Accessor function for _phi_selected */ - inline std::vector<std::shared_ptr<FeatureNode>> phi_selected(){return _phi_selected;}; + inline std::vector<node_ptr> phi_selected(){return _phi_selected;}; /** * @brief Accessor function for _phi @@ -108,9 +108,9 @@ public: void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50); - void project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp); + void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp); - bool valid_score_against_past(double* val_ptr, std::vector<double>& scores_comp); + bool valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp); bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp); /** -- GitLab