Skip to content
Snippets Groups Projects
Commit 28c1da42 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Update past feature checks to use scores

limit the number of operations done
parent 016b7a34
No related branches found
No related tags found
No related merge requests found
#include <descriptor_identifier/Model/Model.hpp> #include <descriptor_identifier/Model/Model.hpp>
Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) : Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) :
_n_samp_train(feats[0]->n_samp()), _n_samp_train(feats[0]->n_samp()),
_n_samp_test(feats[0]->n_test_samp()), _n_samp_test(feats[0]->n_test_samp()),
_n_dim(feats.size() + 1), _n_dim(feats.size() + 1),
... ...
......
...@@ -19,7 +19,7 @@ class Model ...@@ -19,7 +19,7 @@ class Model
int _n_samp_test; //!< The number of test samples per feature int _n_samp_test; //!< The number of test samples per feature
int _n_dim; //!< Dimension of the model int _n_dim; //!< Dimension of the model
std::vector<std::shared_ptr<FeatureNode>> _feats; //!< List of features in the model std::vector<node_ptr> _feats; //!< List of features in the model
std::vector<std::vector<double>> _coefs; //!< Coefficients for teh features std::vector<std::vector<double>> _coefs; //!< Coefficients for teh features
std::vector<double> _prop_train; //!< The property to be modeled std::vector<double> _prop_train; //!< The property to be modeled
...@@ -41,7 +41,7 @@ public: ...@@ -41,7 +41,7 @@ public:
* @param prop The property * @param prop The property
* @param feats The features for the model * @param feats The features for the model
*/ */
Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test); Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test);
/** /**
... ...
......
...@@ -176,7 +176,7 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) ...@@ -176,7 +176,7 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
inds = util_funcs::argsort(all_min_error); inds = util_funcs::argsort(all_min_error);
std::vector<std::shared_ptr<FeatureNode>> min_nodes(n_dim); std::vector<node_ptr> min_nodes(n_dim);
std::vector<Model> models; std::vector<Model> models;
for(int rr = 0; rr < _n_residual; ++rr) for(int rr = 0; rr < _n_residual; ++rr)
... ...
......
...@@ -295,8 +295,15 @@ void FeatureSpace::generate_feature_space() ...@@ -295,8 +295,15 @@ void FeatureSpace::generate_feature_space()
_n_feat = _phi.size(); _n_feat = _phi.size();
} }
void FeatureSpace::project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp) void FeatureSpace::project_generated(double* prop, int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
{ {
std::vector<double> scores_prev_sel;
if(node_value_arrs::N_SELECTED > _n_sis_select)
{
scores_prev_sel.resize(_phi_selected.size());
_project(prop, scores_prev_sel.data(), _phi_selected, _task_sizes, size / _n_samp);
}
for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size()) for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
{ {
std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1); std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1);
...@@ -322,11 +329,11 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh ...@@ -322,11 +329,11 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh
bool is_valid = valid_score_against_current(end_check, generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_sel, scores_comp); bool is_valid = valid_score_against_current(end_check, generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_sel, scores_comp);
// Check the feature against those selected from previous SIS iterations // Check the feature against those selected from previous SIS iterations
if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid) if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid)
is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores_comp); is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
if(is_valid) if(is_valid)
{ {
std::shared_ptr<FeatureNode> new_feat = std::make_shared<FeatureNode>(node_value_arrs::N_SELECTED - _n_sis_select + end_check, generated_phi[inds[ii]]->expr(), generated_phi[inds[ii]]->value(), generated_phi[inds[ii]]->test_value(), generated_phi[inds[ii]]->unit(), true); node_ptr new_feat = std::make_shared<FeatureNode>(node_value_arrs::N_SELECTED - _n_sis_select + end_check, generated_phi[inds[ii]]->expr(), generated_phi[inds[ii]]->value(), generated_phi[inds[ii]]->test_value(), generated_phi[inds[ii]]->unit(), true);
phi_sel.insert(phi_sel.begin() + end_check, new_feat); phi_sel.insert(phi_sel.begin() + end_check, new_feat);
scores_sel.insert(scores_sel.begin() + end_check, cur_score); scores_sel.insert(scores_sel.begin() + end_check, cur_score);
for(int jj = end_check + 1; jj < _n_sis_select; ++jj) for(int jj = end_check + 1; jj < _n_sis_select; ++jj)
...@@ -345,19 +352,17 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh ...@@ -345,19 +352,17 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh
} }
} }
bool FeatureSpace::valid_score_against_past(double* val_ptr, std::vector<double>& scores_comp) bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp)
{ {
double cur_feat_mean = util_funcs::mean(val_ptr, _n_samp); std::transform(scores_past.begin(), scores_past.end(), scores_comp.begin(), [&cur_score](double score){return cur_score - score;});
double cur_feat_std = util_funcs::stand_dev(val_ptr, _n_samp);
std::transform(val_ptr, val_ptr + _n_samp, val_ptr, [&cur_feat_mean, &cur_feat_std](double val){return (val - cur_feat_mean) / cur_feat_std;});
dgemv_('T', _n_samp, scores_comp.size(), 1.0 / static_cast<double>(_n_samp), node_value_arrs::D_MATRIX.data(), _n_samp, val_ptr, 1, 0.0, scores_comp.data(), 1);
if(1.0 - util_funcs::max_abs_val<double>(scores_comp.data(), scores_comp.size()) < 1e-13) // If two scores are the same then they are possibly the same feature, if not then they can't be
if(*std::min_element(scores_comp.begin(), scores_comp.end()) < 1e-10)
{
int dd = std::min_element(scores_comp.begin(), scores_comp.end()) - scores_comp.begin();
if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1e-13)
return false; return false;
}
std::transform(val_ptr, val_ptr + _n_samp, val_ptr, [&cur_feat_mean, &cur_feat_std](double val){return val * cur_feat_std + cur_feat_mean;});
return true; return true;
} }
...@@ -377,26 +382,14 @@ bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, d ...@@ -377,26 +382,14 @@ bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, d
void FeatureSpace::sis(std::vector<double>& prop) void FeatureSpace::sis(std::vector<double>& prop)
{ {
std::vector<double> means(node_value_arrs::N_SELECTED);
std::vector<double> stand_devs(node_value_arrs::N_SELECTED);
std::vector<double> scores_comp(std::max(node_value_arrs::N_SELECTED, _n_sis_select), 1.0); std::vector<double> scores_comp(std::max(node_value_arrs::N_SELECTED, _n_sis_select), 1.0);
std::vector<double> scores_sel(_n_sis_select, 0.0); std::vector<double> scores_sel(_n_sis_select, 0.0);
std::vector<std::shared_ptr<FeatureNode>> phi_sel;
std::vector<node_ptr> phi_sel;
phi_sel.reserve(_n_sis_select); phi_sel.reserve(_n_sis_select);
int cur_feat = node_value_arrs::N_SELECTED; int cur_feat = node_value_arrs::N_SELECTED;
// Standardize the description matrix
if(cur_feat > 0)
{
for(int dd = 0; dd < cur_feat; ++dd)
{
means[dd] = util_funcs::mean(node_value_arrs::get_d_matrix_ptr(dd), _n_samp);
stand_devs[dd] = util_funcs::stand_dev(node_value_arrs::get_d_matrix_ptr(dd), _n_samp);
std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return (val - means[dd]) / stand_devs[dd];});
}
}
node_value_arrs::resize_d_matrix_arr(_n_sis_select); node_value_arrs::resize_d_matrix_arr(_n_sis_select);
_phi_selected.reserve(_phi_selected.size() + _n_sis_select); _phi_selected.reserve(_phi_selected.size() + _n_sis_select);
...@@ -408,12 +401,19 @@ void FeatureSpace::sis(std::vector<double>& prop) ...@@ -408,12 +401,19 @@ void FeatureSpace::sis(std::vector<double>& prop)
int cur_feat_local = 0; int cur_feat_local = 0;
double cur_score = 0.0; double cur_score = 0.0;
std::vector<double> scores_prev_sel;
if(node_value_arrs::N_SELECTED > _n_sis_select)
{
scores_prev_sel.resize(_phi_selected.size());
_project(prop.data(), scores_prev_sel.data(), _phi_selected, _task_sizes, prop.size() / _n_samp);
}
while((cur_feat_local != _n_sis_select) && (ii < _scores.size())) while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
{ {
bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp); bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp);
// Check the feature against those selected from previous SIS iterations // Check the feature against those selected from previous SIS iterations
if(cur_feat > 0 && is_valid) if(cur_feat > 0 && is_valid)
is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), scores_comp); is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
if(is_valid) if(is_valid)
{ {
...@@ -431,11 +431,6 @@ void FeatureSpace::sis(std::vector<double>& prop) ...@@ -431,11 +431,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
project_generated(prop.data(), prop.size(), phi_sel, scores_sel, scores_comp); project_generated(prop.data(), prop.size(), phi_sel, scores_sel, scores_comp);
} }
// Unstandardize the description matrix
if(cur_feat > 0)
for(int dd = 0; dd < cur_feat; ++dd)
std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return val * stand_devs[dd] + means[dd];});
phi_sel.resize(_n_sis_select); phi_sel.resize(_n_sis_select);
scores_sel.resize(_n_sis_select); scores_sel.resize(_n_sis_select);
...@@ -489,7 +484,7 @@ void FeatureSpace::sis(std::vector<double>& prop) ...@@ -489,7 +484,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
if(_mpi_comm->rank() == 0) if(_mpi_comm->rank() == 0)
{ {
std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), 0.0); std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), 0.0);
std::vector<std::shared_ptr<FeatureNode>> sent_phi(_n_sis_select * _mpi_comm->size()); std::vector<node_ptr> sent_phi(_n_sis_select * _mpi_comm->size());
std::copy_n(scores_sel.begin(), _n_sis_select, sent_scores.begin()); std::copy_n(scores_sel.begin(), _n_sis_select, sent_scores.begin());
std::copy_n(phi_sel.begin(), _n_sis_select, sent_phi.begin()); std::copy_n(phi_sel.begin(), _n_sis_select, sent_phi.begin());
... ...
......
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
*/ */
class FeatureSpace class FeatureSpace
{ {
std::vector<std::shared_ptr<FeatureNode>> _phi_selected; //!< selected features std::vector<node_ptr> _phi_selected; //!< selected features
std::vector<node_ptr> _phi; //!< all features std::vector<node_ptr> _phi; //!< all features
std::vector<node_ptr> _phi_0; //!< initial feature space std::vector<node_ptr> _phi_0; //!< initial feature space
...@@ -82,7 +82,7 @@ public: ...@@ -82,7 +82,7 @@ public:
/** /**
* @brief Accessor function for _phi_selected * @brief Accessor function for _phi_selected
*/ */
inline std::vector<std::shared_ptr<FeatureNode>> phi_selected(){return _phi_selected;}; inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
/** /**
* @brief Accessor function for _phi * @brief Accessor function for _phi
...@@ -108,9 +108,9 @@ public: ...@@ -108,9 +108,9 @@ public:
void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50); void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50);
void project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp); void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
bool valid_score_against_past(double* val_ptr, std::vector<double>& scores_comp); bool valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp);
bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp); bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
/** /**
... ...
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment