From 553827ef405041f57bde8ae2c31159853dbc7a63 Mon Sep 17 00:00:00 2001
From: Thomas Purcell <purcell@fhi-berlin.mpg.de>
Date: Mon, 8 Jun 2020 14:21:22 +0200
Subject: [PATCH] Bug Fixes

typos when introducing the test set led to problems
---
 src/descriptor_identifier/Model/Model.cpp     | 27 ++++++++++++-------
 src/descriptor_identifier/Model/Model.hpp     |  6 ++---
 .../negative_exponential.cpp                  |  6 +++--
 .../allowed_operator_nodes/sin.cpp            |  4 +--
 .../allowed_operator_nodes/subtract.hpp       |  2 +-
 src/inputs/InputParser.cpp                    |  1 +
 src/main.cpp                                  |  9 +++++--
 7 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/src/descriptor_identifier/Model/Model.cpp b/src/descriptor_identifier/Model/Model.cpp
index 3a3e09ae..95cbb455 100644
--- a/src/descriptor_identifier/Model/Model.cpp
+++ b/src/descriptor_identifier/Model/Model.cpp
@@ -22,11 +22,12 @@ Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std:
     for(int ff = 0; ff < feats.size(); ++ff)
     {
         std::copy_n(feats[ff]->value_ptr(), _n_samp_train, _D_train.data() + ff * _n_samp_train);
-        std::copy_n(feats[ff]->test_value_ptr(), _n_samp_test, _D_test.data() + ff * _n_samp_test);
         std::copy_n(feats[ff]->value_ptr(), _n_samp_train, a.data() + ff * _n_samp_train);
+
+        if(_n_samp_test > 0)
+            std::copy_n(feats[ff]->test_value_ptr(), _n_samp_test, _D_test.data() + ff * _n_samp_test);
     }
     std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_train, _D_train.data() + feats.size() * _n_samp_train);
-    std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_test, _D_test.data() + feats.size() * _n_samp_test);
 
     std::vector<double> s(_n_dim, 0.0);
     std::vector<double> work(_n_dim * _n_samp_train, 0.0);
@@ -37,10 +38,14 @@ Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std:
     std::copy_n(prop_train.begin(), _n_dim, _coefs.data());
 
     dgemv_('N', _n_samp_train, _n_dim, 1.0, _D_train.data(), _n_samp_train, _coefs.data(), 1, 0.0, _prop_train_est.data(), 1);
-    dgemv_('N', _n_samp_test, _n_dim, 1.0, _D_test.data(), _n_samp_test, _coefs.data(), 1, 0.0, _prop_test_est.data(), 1);
-
     std::transform(_prop_train_est.begin(), _prop_train_est.end(), _prop_train.data(), _train_error.data(), std::minus<double>());
-    std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.data(), _test_error.data(), std::minus<double>());
+    if(_n_samp_test > 0)
+    {
+        std::copy_n(std::vector<double>(_n_samp_test, 1.0).data(), _n_samp_test, _D_test.data() + feats.size() * _n_samp_test);
+        dgemv_('N', _n_samp_test, _n_dim, 1.0, _D_test.data(), _n_samp_test, _coefs.data(), 1, 0.0, _prop_test_est.data(), 1);
+        std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.data(), _test_error.data(), std::minus<double>());
+    }
+
 }
 
 std::string Model::toString() const
@@ -67,7 +72,7 @@ void Model::train_to_file(std::string filename)
     out_file_stream.open(filename);
 
     out_file_stream << "# " << toString() << std::endl;
-    out_file_stream << "# RMSE: " << rmse() << "; Max AE( " << max_ae() << std::endl;
+    out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
     out_file_stream << "# coeffs:";
     for(auto& coef: _coefs)
         out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";";
@@ -86,7 +91,7 @@ void Model::train_to_file(std::string filename)
     out_file_stream.close();
 }
 
-void Model::test_to_file(std::string filename)
+void Model::test_to_file(std::string filename, std::vector<int> test_inds)
 {
     boost::filesystem::path p(filename.c_str());
     boost::filesystem::create_directories(p.remove_filename());
@@ -95,6 +100,10 @@ void Model::test_to_file(std::string filename)
     out_file_stream.open(filename);
 
     out_file_stream << "# " << toString() << std::endl;
+    out_file_stream << "# Testing Indexes: [" << test_inds[0];
+    for(int ss = 1; ss < _n_samp_test; ++ss)
+        out_file_stream << ", " << test_inds[ss];
+    out_file_stream << "]" << std::endl;
     out_file_stream << "# RMSE: " << test_rmse() << "; Max AE: " << test_max_ae() << std::endl;
     out_file_stream << "# coeffs:";
     for(auto& coef: _coefs)
@@ -107,8 +116,8 @@ void Model::test_to_file(std::string filename)
     for(int ss = 0; ss < _n_samp_test; ++ss)
     {
         out_file_stream << std::setw(24) << std::setprecision(18) << _prop_test[ss] << std::setw(24) << std::setprecision(18) << _prop_test_est[ss];
-        for(int ff = 0; ff < _n_dim - 1; ++ff)
-            out_file_stream << std::setw(24) << std::setprecision(18) << _D_test[ss + ff * _n_samp_train];
+        for(int ff = 0; ff < _feats.size(); ++ff)
+            out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->test_value()[ss];
         out_file_stream << std::endl;
     }
     out_file_stream.close();
diff --git a/src/descriptor_identifier/Model/Model.hpp b/src/descriptor_identifier/Model/Model.hpp
index 208664ad..574a8abb 100644
--- a/src/descriptor_identifier/Model/Model.hpp
+++ b/src/descriptor_identifier/Model/Model.hpp
@@ -77,19 +77,19 @@ public:
      */
     inline double max_ae()
     {
-        return *std::max_element(_train_error.data(), _train_error.data() + _n_samp_train, [](double d1, double d2){return std::abs(d1) < std::abs(d2);});
+        return std::abs(*std::max_element(_train_error.data(), _train_error.data() + _n_samp_train, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}));
     }
 
     inline double test_max_ae()
     {
-        return *std::max_element(_test_error.data(), _test_error.data() + _n_samp_test, [](double d1, double d2){return std::abs(d1) < std::abs(d2);});
+        return std::abs(*std::max_element(_test_error.data(), _test_error.data() + _n_samp_test, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}));
     }
 
 
     /**
      * @brief Print model to a file
      */
-    void test_to_file(std::string filename);
+    void test_to_file(std::string filename, std::vector<int> test_inds);
 
     /**
      * @brief Print model to a file
diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp
index b4761ebe..df28a9e1 100644
--- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp
+++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp
@@ -13,9 +13,10 @@ NegExpNode::NegExpNode(std::vector<node_ptr> feats, int rung, int feat_ind):
         throw InvalidFeatureException();
 
     set_value();
-
     if(is_nan() || is_const())
         throw InvalidFeatureException();
+
+    set_test_value();
  }
 
 NegExpNode::NegExpNode(node_ptr feat, int rung, int feat_ind):
@@ -28,9 +29,10 @@ NegExpNode::NegExpNode(node_ptr feat, int rung, int feat_ind):
         throw InvalidFeatureException();
 
     set_value();
-
     if(is_nan() || is_const())
         throw InvalidFeatureException();
+
+    set_test_value();
  }
 
 void NegExpNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot)
diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp
index 3ab1f7e5..3aedd073 100644
--- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp
+++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp
@@ -16,7 +16,7 @@ SinNode::SinNode(std::vector<node_ptr> feats, int rung, int feat_ind):
     if(is_nan() || is_const())
         throw InvalidFeatureException();
 
-       set_test_value();
+    set_test_value();
  }
 
 SinNode::SinNode(node_ptr feat, int rung, int feat_ind):
@@ -32,7 +32,7 @@ SinNode::SinNode(node_ptr feat, int rung, int feat_ind):
     if(is_nan() || is_const())
         throw InvalidFeatureException();
 
-       set_test_value();
+    set_test_value();
  }
 
 void SinNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot)
diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp
index 2c946896..8883fe3e 100644
--- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp
+++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp
@@ -33,7 +33,7 @@ public:
     inline void set_test_value(int offset = -1)
     {
         offset = (offset == -1) ? rung() : offset;
-        allowed_op_funcs::mult(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset));
+        allowed_op_funcs::sub(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset));
     }
 
     /**
diff --git a/src/inputs/InputParser.cpp b/src/inputs/InputParser.cpp
index f362c51a..d2ce52ef 100644
--- a/src/inputs/InputParser.cpp
+++ b/src/inputs/InputParser.cpp
@@ -31,6 +31,7 @@ InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::sh
         std::shuffle (indexes.begin(), indexes.end(), std::default_random_engine(seed));
 
         std::copy_n(indexes.begin(), _n_leave_out, _leave_out_inds.begin());
+        std::sort(_leave_out_inds.begin(), _leave_out_inds.end());
     }
     else if((_n_leave_out == 0) && (_leave_out_inds.size() > 0))
         _n_leave_out = _leave_out_inds.size();
diff --git a/src/main.cpp b/src/main.cpp
index 7d51ce4f..a23cff94 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -44,12 +44,17 @@ int main(int argc, char const *argv[])
     {
         for(int ii = 0; ii < sisso.models().size(); ++ii)
         {
-            std::cout << sisso.models()[ii][0].rmse() << std::endl;
+            std::cout << "Train RMSE: " << sisso.models()[ii][0].rmse();
+            if(IP._prop_test.size() > 0)
+                std::cout << "; Test RMSE: " << sisso.models()[ii][0].test_rmse() << std::endl;
+            else
+                std::cout << std::endl;
             std::cout << sisso.models()[ii][0] << "\n" << std::endl;
             for(int jj = 0; jj < sisso.models()[ii].size(); ++jj)
             {
                 sisso.models()[ii][jj].train_to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat");
-                sisso.models()[ii][jj].test_to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat");
+                if(IP._prop_test.size() > 0)
+                    sisso.models()[ii][jj].test_to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat", IP._leave_out_inds);
             }
         }
     }
-- 
GitLab