Update past feature checks to use scores

limit the number of operations done

Update past feature checks to use scores
28c1da42 · Thomas Purcell · 016b7a34 · 28c1da42 · 28c1da42 · 28c1da42
Commit 28c1da42 authored Jun 12, 2020 by Thomas Purcell
--- a/src/descriptor_identifier/Model/Model.cpp
+++ b/src/descriptor_identifier/Model/Model.cpp
 #include <descriptor_identifier/Model/Model.hpp>
-Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) :
+Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) :
    _n_samp_train(feats[0]->n_samp()),
    _n_samp_test(feats[0]->n_test_samp()),
    _n_dim(feats.size() + 1),


--- a/src/descriptor_identifier/Model/Model.hpp
+++ b/src/descriptor_identifier/Model/Model.hpp
@@ -19,7 +19,7 @@ class Model
    int _n_samp_test; //!< The number of test samples per feature
    int _n_dim; //!< Dimension of the model
-    std::vector<std::shared_ptr<FeatureNode>> _feats; //!< List of features in the model
+    std::vector<node_ptr> _feats; //!< List of features in the model
    std::vector<std::vector<double>> _coefs; //!< Coefficients for teh features
    std::vector<double> _prop_train; //!< The property to be modeled
@@ -41,7 +41,7 @@ public:
     * @param prop The property
     * @param feats The features for the model
     */
-    Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test);
+    Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test);
    /**


--- a/src/descriptor_identifier/SISSORegressor.cpp
+++ b/src/descriptor_identifier/SISSORegressor.cpp
@@ -176,7 +176,7 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
    inds = util_funcs::argsort(all_min_error);
-    std::vector<std::shared_ptr<FeatureNode>> min_nodes(n_dim);
+    std::vector<node_ptr> min_nodes(n_dim);
    std::vector<Model> models;
    for(int rr = 0; rr < _n_residual; ++rr)


--- a/src/feature_creation/feature_space/FeatureSpace.cpp
+++ b/src/feature_creation/feature_space/FeatureSpace.cpp
@@ -295,8 +295,15 @@ void FeatureSpace::generate_feature_space()
    _n_feat = _phi.size();
 }
-void FeatureSpace::project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
+void FeatureSpace::project_generated(double* prop, int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
 {
+    std::vector<double> scores_prev_sel;
+    if(node_value_arrs::N_SELECTED > _n_sis_select)
+    {
+        scores_prev_sel.resize(_phi_selected.size());
+        _project(prop, scores_prev_sel.data(), _phi_selected, _task_sizes, size / _n_samp);
+    }
    for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
    {
        std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1);
@@ -322,11 +329,11 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh
            bool is_valid = valid_score_against_current(end_check, generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_sel, scores_comp);
            // Check the feature against those selected from previous SIS iterations
            if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid)
-                is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores_comp);
+                is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
            if(is_valid)
            {
-                std::shared_ptr<FeatureNode> new_feat = std::make_shared<FeatureNode>(node_value_arrs::N_SELECTED - _n_sis_select + end_check, generated_phi[inds[ii]]->expr(), generated_phi[inds[ii]]->value(), generated_phi[inds[ii]]->test_value(), generated_phi[inds[ii]]->unit(), true);
+                node_ptr new_feat = std::make_shared<FeatureNode>(node_value_arrs::N_SELECTED - _n_sis_select + end_check, generated_phi[inds[ii]]->expr(), generated_phi[inds[ii]]->value(), generated_phi[inds[ii]]->test_value(), generated_phi[inds[ii]]->unit(), true);
                phi_sel.insert(phi_sel.begin() + end_check, new_feat);
                scores_sel.insert(scores_sel.begin() + end_check, cur_score);
                for(int jj = end_check + 1; jj < _n_sis_select; ++jj)
@@ -345,19 +352,17 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh
    }
 }
-bool FeatureSpace::valid_score_against_past(double* val_ptr, std::vector<double>& scores_comp)
+bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp)
 {
-    double cur_feat_mean = util_funcs::mean(val_ptr, _n_samp);
+    std::transform(scores_past.begin(), scores_past.end(), scores_comp.begin(), [&cur_score](double score){return cur_score - score;});
-    double cur_feat_std = util_funcs::stand_dev(val_ptr, _n_samp);
-    std::transform(val_ptr, val_ptr + _n_samp, val_ptr, [&cur_feat_mean, &cur_feat_std](double val){return (val - cur_feat_mean) / cur_feat_std;});
-    dgemv_('T', _n_samp, scores_comp.size(), 1.0 / static_cast<double>(_n_samp), node_value_arrs::D_MATRIX.data(), _n_samp, val_ptr, 1, 0.0, scores_comp.data(), 1);
-    if(1.0 - util_funcs::max_abs_val<double>(scores_comp.data(), scores_comp.size()) < 1e-13)
+    // If two scores are the same then they are possibly the same feature, if not then they can't be
+    if(*std::min_element(scores_comp.begin(), scores_comp.end()) < 1e-10)
+    {
+        int dd = std::min_element(scores_comp.begin(), scores_comp.end()) - scores_comp.begin();
+        if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1e-13)
            return false;
+    }
-    std::transform(val_ptr, val_ptr + _n_samp, val_ptr, [&cur_feat_mean, &cur_feat_std](double val){return val * cur_feat_std + cur_feat_mean;});
    return true;
 }
@@ -377,26 +382,14 @@ bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, d
 void FeatureSpace::sis(std::vector<double>& prop)
 {
-    std::vector<double> means(node_value_arrs::N_SELECTED);
-    std::vector<double> stand_devs(node_value_arrs::N_SELECTED);
    std::vector<double> scores_comp(std::max(node_value_arrs::N_SELECTED, _n_sis_select), 1.0);
    std::vector<double> scores_sel(_n_sis_select, 0.0);
-    std::vector<std::shared_ptr<FeatureNode>> phi_sel;
+    std::vector<node_ptr> phi_sel;
    phi_sel.reserve(_n_sis_select);
    int cur_feat = node_value_arrs::N_SELECTED;
-    // Standardize the description matrix
-    if(cur_feat > 0)
-    {
-        for(int dd = 0; dd < cur_feat; ++dd)
-        {
-            means[dd] = util_funcs::mean(node_value_arrs::get_d_matrix_ptr(dd), _n_samp);
-            stand_devs[dd] = util_funcs::stand_dev(node_value_arrs::get_d_matrix_ptr(dd), _n_samp);
-            std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return (val - means[dd]) / stand_devs[dd];});
-        }
-    }
    node_value_arrs::resize_d_matrix_arr(_n_sis_select);
    _phi_selected.reserve(_phi_selected.size() + _n_sis_select);
@@ -408,12 +401,19 @@ void FeatureSpace::sis(std::vector<double>& prop)
    int cur_feat_local = 0;
    double cur_score = 0.0;
+    std::vector<double> scores_prev_sel;
+    if(node_value_arrs::N_SELECTED > _n_sis_select)
+    {
+        scores_prev_sel.resize(_phi_selected.size());
+        _project(prop.data(), scores_prev_sel.data(), _phi_selected, _task_sizes, prop.size() / _n_samp);
+    }
    while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
    {
        bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp);
        // Check the feature against those selected from previous SIS iterations
        if(cur_feat > 0 && is_valid)
-            is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), scores_comp);
+            is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
        if(is_valid)
        {
@@ -431,11 +431,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
        project_generated(prop.data(), prop.size(), phi_sel, scores_sel, scores_comp);
    }
-    // Unstandardize the description matrix
-    if(cur_feat > 0)
-        for(int dd = 0; dd < cur_feat; ++dd)
-            std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return val * stand_devs[dd] + means[dd];});
    phi_sel.resize(_n_sis_select);
    scores_sel.resize(_n_sis_select);
@@ -489,7 +484,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
        if(_mpi_comm->rank() == 0)
        {
            std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), 0.0);
-            std::vector<std::shared_ptr<FeatureNode>> sent_phi(_n_sis_select * _mpi_comm->size());
+            std::vector<node_ptr> sent_phi(_n_sis_select * _mpi_comm->size());
            std::copy_n(scores_sel.begin(), _n_sis_select, sent_scores.begin());
            std::copy_n(phi_sel.begin(), _n_sis_select, sent_phi.begin());


--- a/src/feature_creation/feature_space/FeatureSpace.hpp
+++ b/src/feature_creation/feature_space/FeatureSpace.hpp
@@ -20,7 +20,7 @@
 */
 class FeatureSpace
 {
-    std::vector<std::shared_ptr<FeatureNode>> _phi_selected; //!< selected features
+    std::vector<node_ptr> _phi_selected; //!< selected features
    std::vector<node_ptr> _phi; //!< all features
    std::vector<node_ptr> _phi_0; //!< initial feature space
@@ -82,7 +82,7 @@ public:
    /**
     * @brief Accessor function for _phi_selected
     */
-    inline std::vector<std::shared_ptr<FeatureNode>> phi_selected(){return _phi_selected;};
+    inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
    /**
     * @brief Accessor function for _phi
@@ -108,9 +108,9 @@ public:
    void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50);
-    void project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
+    void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
-    bool valid_score_against_past(double* val_ptr, std::vector<double>& scores_comp);
+    bool valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp);
    bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
    /**