Commit 28c1da42 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Update past feature checks to use scores

limit the number of operations done
parent 016b7a34
#include <descriptor_identifier/Model/Model.hpp>
Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) :
Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) :
_n_samp_train(feats[0]->n_samp()),
_n_samp_test(feats[0]->n_test_samp()),
_n_dim(feats.size() + 1),
......
......@@ -19,7 +19,7 @@ class Model
int _n_samp_test; //!< The number of test samples per feature
int _n_dim; //!< Dimension of the model
std::vector<std::shared_ptr<FeatureNode>> _feats; //!< List of features in the model
std::vector<node_ptr> _feats; //!< List of features in the model
std::vector<std::vector<double>> _coefs; //!< Coefficients for teh features
std::vector<double> _prop_train; //!< The property to be modeled
......@@ -41,7 +41,7 @@ public:
* @param prop The property
* @param feats The features for the model
*/
Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test);
Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test);
/**
......
......@@ -176,7 +176,7 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
inds = util_funcs::argsort(all_min_error);
std::vector<std::shared_ptr<FeatureNode>> min_nodes(n_dim);
std::vector<node_ptr> min_nodes(n_dim);
std::vector<Model> models;
for(int rr = 0; rr < _n_residual; ++rr)
......
......@@ -295,8 +295,15 @@ void FeatureSpace::generate_feature_space()
_n_feat = _phi.size();
}
void FeatureSpace::project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
void FeatureSpace::project_generated(double* prop, int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
{
std::vector<double> scores_prev_sel;
if(node_value_arrs::N_SELECTED > _n_sis_select)
{
scores_prev_sel.resize(_phi_selected.size());
_project(prop, scores_prev_sel.data(), _phi_selected, _task_sizes, size / _n_samp);
}
for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
{
std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1);
......@@ -322,11 +329,11 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh
bool is_valid = valid_score_against_current(end_check, generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_sel, scores_comp);
// Check the feature against those selected from previous SIS iterations
if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid)
is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores_comp);
is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
if(is_valid)
{
std::shared_ptr<FeatureNode> new_feat = std::make_shared<FeatureNode>(node_value_arrs::N_SELECTED - _n_sis_select + end_check, generated_phi[inds[ii]]->expr(), generated_phi[inds[ii]]->value(), generated_phi[inds[ii]]->test_value(), generated_phi[inds[ii]]->unit(), true);
node_ptr new_feat = std::make_shared<FeatureNode>(node_value_arrs::N_SELECTED - _n_sis_select + end_check, generated_phi[inds[ii]]->expr(), generated_phi[inds[ii]]->value(), generated_phi[inds[ii]]->test_value(), generated_phi[inds[ii]]->unit(), true);
phi_sel.insert(phi_sel.begin() + end_check, new_feat);
scores_sel.insert(scores_sel.begin() + end_check, cur_score);
for(int jj = end_check + 1; jj < _n_sis_select; ++jj)
......@@ -345,19 +352,17 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh
}
}
bool FeatureSpace::valid_score_against_past(double* val_ptr, std::vector<double>& scores_comp)
bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp)
{
double cur_feat_mean = util_funcs::mean(val_ptr, _n_samp);
double cur_feat_std = util_funcs::stand_dev(val_ptr, _n_samp);
std::transform(val_ptr, val_ptr + _n_samp, val_ptr, [&cur_feat_mean, &cur_feat_std](double val){return (val - cur_feat_mean) / cur_feat_std;});
dgemv_('T', _n_samp, scores_comp.size(), 1.0 / static_cast<double>(_n_samp), node_value_arrs::D_MATRIX.data(), _n_samp, val_ptr, 1, 0.0, scores_comp.data(), 1);
std::transform(scores_past.begin(), scores_past.end(), scores_comp.begin(), [&cur_score](double score){return cur_score - score;});
if(1.0 - util_funcs::max_abs_val<double>(scores_comp.data(), scores_comp.size()) < 1e-13)
return false;
std::transform(val_ptr, val_ptr + _n_samp, val_ptr, [&cur_feat_mean, &cur_feat_std](double val){return val * cur_feat_std + cur_feat_mean;});
// If two scores are the same then they are possibly the same feature, if not then they can't be
if(*std::min_element(scores_comp.begin(), scores_comp.end()) < 1e-10)
{
int dd = std::min_element(scores_comp.begin(), scores_comp.end()) - scores_comp.begin();
if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1e-13)
return false;
}
return true;
}
......@@ -377,26 +382,14 @@ bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, d
void FeatureSpace::sis(std::vector<double>& prop)
{
std::vector<double> means(node_value_arrs::N_SELECTED);
std::vector<double> stand_devs(node_value_arrs::N_SELECTED);
std::vector<double> scores_comp(std::max(node_value_arrs::N_SELECTED, _n_sis_select), 1.0);
std::vector<double> scores_sel(_n_sis_select, 0.0);
std::vector<std::shared_ptr<FeatureNode>> phi_sel;
std::vector<node_ptr> phi_sel;
phi_sel.reserve(_n_sis_select);
int cur_feat = node_value_arrs::N_SELECTED;
// Standardize the description matrix
if(cur_feat > 0)
{
for(int dd = 0; dd < cur_feat; ++dd)
{
means[dd] = util_funcs::mean(node_value_arrs::get_d_matrix_ptr(dd), _n_samp);
stand_devs[dd] = util_funcs::stand_dev(node_value_arrs::get_d_matrix_ptr(dd), _n_samp);
std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return (val - means[dd]) / stand_devs[dd];});
}
}
node_value_arrs::resize_d_matrix_arr(_n_sis_select);
_phi_selected.reserve(_phi_selected.size() + _n_sis_select);
......@@ -408,12 +401,19 @@ void FeatureSpace::sis(std::vector<double>& prop)
int cur_feat_local = 0;
double cur_score = 0.0;
std::vector<double> scores_prev_sel;
if(node_value_arrs::N_SELECTED > _n_sis_select)
{
scores_prev_sel.resize(_phi_selected.size());
_project(prop.data(), scores_prev_sel.data(), _phi_selected, _task_sizes, prop.size() / _n_samp);
}
while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
{
bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp);
// Check the feature against those selected from previous SIS iterations
if(cur_feat > 0 && is_valid)
is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), scores_comp);
is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
if(is_valid)
{
......@@ -431,11 +431,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
project_generated(prop.data(), prop.size(), phi_sel, scores_sel, scores_comp);
}
// Unstandardize the description matrix
if(cur_feat > 0)
for(int dd = 0; dd < cur_feat; ++dd)
std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return val * stand_devs[dd] + means[dd];});
phi_sel.resize(_n_sis_select);
scores_sel.resize(_n_sis_select);
......@@ -489,7 +484,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
if(_mpi_comm->rank() == 0)
{
std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), 0.0);
std::vector<std::shared_ptr<FeatureNode>> sent_phi(_n_sis_select * _mpi_comm->size());
std::vector<node_ptr> sent_phi(_n_sis_select * _mpi_comm->size());
std::copy_n(scores_sel.begin(), _n_sis_select, sent_scores.begin());
std::copy_n(phi_sel.begin(), _n_sis_select, sent_phi.begin());
......
......@@ -20,7 +20,7 @@
*/
class FeatureSpace
{
std::vector<std::shared_ptr<FeatureNode>> _phi_selected; //!< selected features
std::vector<node_ptr> _phi_selected; //!< selected features
std::vector<node_ptr> _phi; //!< all features
std::vector<node_ptr> _phi_0; //!< initial feature space
......@@ -82,7 +82,7 @@ public:
/**
* @brief Accessor function for _phi_selected
*/
inline std::vector<std::shared_ptr<FeatureNode>> phi_selected(){return _phi_selected;};
inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
/**
* @brief Accessor function for _phi
......@@ -108,9 +108,9 @@ public:
void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50);
void project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
bool valid_score_against_past(double* val_ptr, std::vector<double>& scores_comp);
bool valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp);
bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
/**
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment