From c16acd5fbeae843c33cdedbe1b7648ad8ca711c9 Mon Sep 17 00:00:00 2001
From: Thomas Purcell <purcell@fhi-berlin.mpg.de>
Date: Mon, 6 Jul 2020 09:52:54 +0200
Subject: [PATCH] Refactor of Model

Standardized the output file and added constructors to recreate model from the output files
---
 src/descriptor_identifier/Model/Model.cpp     | 295 ++++++++++++++----
 src/descriptor_identifier/Model/Model.hpp     |  40 ++-
 src/descriptor_identifier/SISSORegressor.cpp  |  30 +-
 src/descriptor_identifier/SISSORegressor.hpp  |   7 +-
 .../feature_space/FeatureSpace.cpp            |  19 +-
 .../feature_space/FeatureSpace.hpp            |   8 +-
 src/feature_creation/node/FeatureNode.cpp     |   9 +-
 src/feature_creation/node/FeatureNode.hpp     |   2 +-
 src/feature_creation/node/ModelNode.cpp       |   2 +-
 src/feature_creation/node/Node.cpp            |   4 +-
 src/main.cpp                                  |  14 +-
 src/utils/string_utils.cpp                    |  11 +
 src/utils/string_utils.hpp                    |  18 ++
 13 files changed, 357 insertions(+), 102 deletions(-)
 create mode 100644 src/utils/string_utils.cpp
 create mode 100644 src/utils/string_utils.hpp

diff --git a/src/descriptor_identifier/Model/Model.cpp b/src/descriptor_identifier/Model/Model.cpp
index bc313a73..01122e53 100644
--- a/src/descriptor_identifier/Model/Model.cpp
+++ b/src/descriptor_identifier/Model/Model.cpp
@@ -67,6 +67,175 @@ Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std:
     }
 }
 
+Model::Model(std::string train_file)
+{
+    _n_samp_test = 0;
+
+    std::vector<std::string> split_str;
+    std::vector<std::string> feature_expr_train = populate_model(train_file, true);
+
+    for(int ff = 0; ff < feature_expr_train.size(); ++ff)
+    {
+        split_str = str_utils::split_string_trim(feature_expr_train[ff]);
+
+        int rung = std::stoi(split_str[0]);
+        std::string unit_str = split_str[1];
+        std::string expr = split_str[2];
+
+        std::vector<double> feat_val(_n_samp_train);
+        std::vector<double> feat_test_val = {};
+        std::copy_n(&_D_train[ff * _n_samp_train], _n_samp_train, feat_val.data());
+
+        model_node_ptr feat = std::make_shared<ModelNode>(ff, rung, expr, feat_val, feat_test_val, Unit(unit_str));
+        _feats.push_back(feat);
+    }
+
+}
+
+Model::Model(std::string train_file, std::string test_file)
+{
+    std::vector<std::string> split_str;
+    std::vector<std::string> feature_expr_train = populate_model(train_file, true);
+    std::vector<std::string> feature_expr_test = populate_model(test_file, false);
+
+    for(int ff = 0; ff < feature_expr_train.size(); ++ff)
+    {
+        if(feature_expr_train[ff] != feature_expr_test[ff])
+            throw std::logic_error("Features for train and test file do not agree");
+
+        split_str = str_utils::split_string_trim(feature_expr_train[ff]);
+
+        int rung = std::stoi(split_str[0]);
+        std::string unit_str = split_str[1];
+        std::string expr = split_str[2];
+        std::vector<double> feat_val(_n_samp_train);
+        std::vector<double> feat_test_val(_n_samp_test);
+
+        std::copy_n(&_D_train[ff * _n_samp_train], _n_samp_train, feat_val.data());
+        std::copy_n(&_D_test[ff * _n_samp_test], _n_samp_test, feat_test_val.data());
+
+        _feats.push_back(std::make_shared<ModelNode>(ff, rung, expr, feat_val, feat_test_val, Unit(unit_str)));
+    }
+}
+
+std::vector<std::string> Model::populate_model(std::string filename, bool train)
+{
+
+    std::ifstream file_stream;
+    file_stream.open(filename, std::ios::in);
+
+    std::vector<std::string> feature_expr;
+    std::vector<std::string> split_line;
+
+    // Store model line
+    std::string model_line;
+    std::getline(file_stream, model_line);
+
+    // Get the error
+    std::string error_line;
+    std::getline(file_stream, error_line);
+    split_line = str_utils::split_string_trim(error_line);
+    double rmse = std::stod(split_line[1]);
+    double max_ae = std::stod(split_line[3]);
+
+    // Get coefficients
+    std::string line;
+    std::getline(file_stream, line);
+    std::getline(file_stream, line);
+
+    int n_task = 0;
+    int _n_dim = 0;
+    std::getline(file_stream, line);
+
+    do
+    {
+        ++n_task;
+        split_line = str_utils::split_string_trim(line);
+        _n_dim = split_line.size() - 3;
+        if(train)
+        {
+            _coefs.push_back(std::vector<double>(_n_dim + 1, 0.0));
+            std::transform(split_line.begin() + 1, split_line.end()-1, _coefs.back().data(), [](std::string s){return  std::stod(s);});
+        }
+        std::getline(file_stream, line);
+    } while(line.substr(0, 39).compare("# Feature Rung, Units, and Expressions") != 0);
+
+    std::getline(file_stream, line);
+    for(int ff = 0; ff < _n_dim; ++ff)
+    {
+        feature_expr.push_back(line.substr(6));
+        std::getline(file_stream, line);
+    }
+
+    std::getline(file_stream, line);
+
+    int n_samp = 0;
+    for(int tt = 0; tt < n_task; ++tt)
+    {
+        std::getline(file_stream, line);
+        split_line = str_utils::split_string_trim(line);
+        n_samp += std::stoi(split_line[1]);
+        if(train)
+            _task_sizes_train.push_back(std::stoi(split_line[1]));
+        else
+            _task_sizes_test.push_back(std::stoi(split_line[1]));
+    }
+    if(train)
+    {
+        _n_samp_train = n_samp;
+        _prop_train.resize(n_samp);
+        _prop_train_est.resize(n_samp);
+        _train_error.resize(n_samp);
+    }
+    else
+    {
+        _n_samp_test = n_samp;
+        _prop_test.resize(n_samp);
+        _prop_test_est.resize(n_samp);
+        _test_error.resize(n_samp);
+    }
+    std::getline(file_stream, line);
+    std::getline(file_stream, line);
+    if(!train)
+        std::getline(file_stream, line);
+    std::vector<std::vector<double>> feat_vals(_n_dim, std::vector<double>(n_samp, 0.0));
+    for(int ns = 0; ns < n_samp; ++ns)
+    {
+        std::getline(file_stream, line);
+        split_line = str_utils::split_string_trim(line);
+        if(train)
+        {
+            _prop_train[ns] = std::stod(split_line[0]);
+            _prop_train_est[ns] = std::stod(split_line[1]);
+            _train_error[ns] = _prop_train_est[ns] - _prop_train[ns];
+
+        }
+        else
+        {
+            _prop_test[ns] = std::stod(split_line[0]);
+            _prop_test_est[ns] = std::stod(split_line[1]);
+            _test_error[ns] = _prop_test_est[ns] - _prop_test[ns];
+        }
+        for(int nf = 0; nf < _n_dim; ++nf)
+        {
+            feat_vals[nf][ns] = std::stod(split_line[2 + nf]);
+        }
+    }
+    if(train)
+    {
+        _D_train.resize(_n_dim * n_samp);
+        for(int nf = 0; nf < _n_dim; ++nf)
+            std::copy_n(feat_vals[nf].data(), n_samp, &_D_train[nf * n_samp]);
+    }
+    else
+    {
+        _D_test.resize(_n_dim * n_samp);
+        for(int nf = 0; nf < _n_dim; ++nf)
+            std::copy_n(feat_vals[nf].data(), n_samp, &_D_test[nf * n_samp]);
+    }
+    return feature_expr;
+}
+
 std::string Model::toString() const
 {
     std::stringstream unit_rep;
@@ -75,14 +244,14 @@ std::string Model::toString() const
         unit_rep << " + a" << std::to_string(ff) << " * " << _feats[ff]->expr();
     return unit_rep.str();
 }
-//
+
 std::ostream& operator<< (std::ostream& outStream, const Model& model)
 {
     outStream << model.toString();
     return outStream;
 }
 
-void Model::train_to_file(std::string filename)
+void Model::to_file(std::string filename, bool train, std::vector<int> test_inds)
 {
     boost::filesystem::path p(filename.c_str());
     boost::filesystem::create_directories(p.remove_filename());
@@ -91,78 +260,74 @@ void Model::train_to_file(std::string filename)
     out_file_stream.open(filename);
 
     out_file_stream << "# " << toString() << std::endl;
-    out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
+    if(train)
+        out_file_stream << "# RMSE: " << std::setprecision(15) << rmse() << "; Max AE: " << max_ae() << std::endl;
+    else
+        out_file_stream << "# RMSE: " << std::setprecision(15) << test_rmse() << "; Max AE: " << test_max_ae() << std::endl;
 
     out_file_stream << "# Coefficients" << std::endl;
-    out_file_stream << std::setw(10) << std::left << "# Task,";
+    out_file_stream << std::setw(10) << std::left << "# Task;";
+
     for(int cc = 0; cc < _coefs[0].size() - 1; ++cc)
-        out_file_stream << std::setw(24) << "a" + std::to_string(cc);
-    out_file_stream << std::setw(24) << "c0" << std::endl;
+        out_file_stream << std::setw(24) << " a" + std::to_string(cc);
+
+    out_file_stream << " c0" << std::endl;
 
     for(int cc = 0; cc < _coefs.size(); ++cc)
     {
-        out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc);
+        out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc) + ", ";
         for(auto& coeff : _coefs[cc])
-            out_file_stream << std::setw(24) << std::setprecision(18) << coeff;
+            out_file_stream << std::setprecision(15) << std::scientific << std::right << std::setw(22) << coeff << std::setw(2) << ", ";
         out_file_stream << "\n";
     }
 
-    out_file_stream << "\n" << std::setw(24) << std::left << "# Property Value" << std::setw(24) << "Property Value (EST)";
+    out_file_stream << "# Feature Rung, Units, and Expressions" << std::endl;
     for(int ff = 0; ff < _feats.size(); ++ff)
-        out_file_stream << std::setw(24) << "Feature " + std::to_string(ff) + " Value";
-    out_file_stream << std::endl;
+        out_file_stream << std::setw(6) << std::left << "# " + std::to_string(ff) + ", " << std::to_string(_feats[ff]->rung()) + ", " << std::setw(50) << _feats[ff]->unit().toString() + ", " << _feats[ff]->expr() << std::endl;
 
-    for(int ss = 0; ss < _n_samp_train; ++ss)
+    out_file_stream << "# Number of Samples Per Task" << std::endl;
+    if(train)
     {
-        out_file_stream << std::setw(24) << std::setprecision(18) << _prop_train[ss] << std::setw(24) << std::setprecision(18) << _prop_train_est[ss];
-        for(int ff = 0; ff < _n_dim - 1; ++ff)
-            out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->value()[ss];
-        out_file_stream << std::endl;
+        out_file_stream << std::setw(10) << std::left << "# Task;" << std::setw(24) << "n_mats_train" << std::endl;
+        for(int tt = 0; tt < _task_sizes_train.size(); ++tt)
+            out_file_stream << std::left << std::setw(10) << "# " + std::to_string(tt) + ", " << std::left << std::setw(22) << _task_sizes_train[tt] << std::endl;
     }
-    out_file_stream.close();
-}
-
-void Model::test_to_file(std::string filename, std::vector<int> test_inds)
-{
-    boost::filesystem::path p(filename.c_str());
-    boost::filesystem::create_directories(p.remove_filename());
-
-    std::ofstream out_file_stream = std::ofstream();
-    out_file_stream.open(filename);
-
-    out_file_stream << "# " << toString() << std::endl;
-    out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
-
-    out_file_stream << "# Coefficients" << std::endl;
-    out_file_stream << std::setw(10) << std::left << "# Task";
-    for(int cc = 0; cc < _coefs[0].size() - 1; ++cc)
-        out_file_stream << std::setw(24) << "a" + std::to_string(cc);
-    out_file_stream << std::setw(24) << "c0" << std::endl;
-
-    for(int cc = 0; cc < _coefs.size(); ++cc)
+    else
     {
-        out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc);
-        for(auto& coeff : _coefs[cc])
-            out_file_stream << std::setw(24) << std::setprecision(18) << coeff;
-        out_file_stream << "\n";
-    }
+        out_file_stream << std::setw(10) << std::left << "# Task;" << std::setw(24) << "n_mats_test" << std::endl;
+        for(int tt = 0; tt < _task_sizes_test.size(); ++tt)
+            out_file_stream << std::left << std::setw(10) << "# " + std::to_string(tt) + ", " << std::left << std::setw(22) << _task_sizes_test[tt] << std::endl;
 
-    out_file_stream << "# Test Indexes: [ " << test_inds[0];
-    for(int ii = 1; ii < test_inds.size(); ++ii)
-        out_file_stream << ", " << test_inds[ii];
-    out_file_stream << " ]" << std::endl;
+            out_file_stream << "# Test Indexes: [ " << test_inds[0];
+            for(int ii = 1; ii < test_inds.size(); ++ii)
+                out_file_stream << ", " << test_inds[ii];
+            out_file_stream << " ]" << std::endl;
+    }
 
-    out_file_stream << "\n" << std::setw(24) << std::left << "# Property Value" << std::setw(24) << "Property Value (EST)";
+    out_file_stream << "\n" << std::setw(24) << std::left << "#Property Value" << std::setw(24) << " Property Value (EST)";
     for(int ff = 0; ff < _feats.size(); ++ff)
-        out_file_stream << std::setw(24) << "Feature " + std::to_string(ff) + " Value";
+        out_file_stream << std::setw(24) << " Feature " + std::to_string(ff) + " Value";
     out_file_stream << std::endl;
 
-    for(int ss = 0; ss < _n_samp_test; ++ss)
+    if(train)
     {
-        out_file_stream << std::setw(24) << std::setprecision(18) << _prop_test[ss] << std::setw(24) << std::setprecision(18) << _prop_test_est[ss];
-        for(int ff = 0; ff < _n_dim - 1; ++ff)
-            out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->test_value()[ss];
-        out_file_stream << std::endl;
+        for(int ss = 0; ss < _n_samp_train; ++ss)
+        {
+            out_file_stream << std::right << std::setw(22) << std::setprecision(15) << std::scientific << _prop_train[ss] << std::setw(2) << ", " << std::setw(22) << _prop_train_est[ss];
+            for(int ff = 0; ff < _n_dim - 1; ++ff)
+                out_file_stream << std::right << std::setw(2) << ", " << std::setw(22) << std::setprecision(15) << _feats[ff]->value()[ss];
+            out_file_stream << std::endl;
+        }
+    }
+    else
+    {
+        for(int ss = 0; ss < _n_samp_test; ++ss)
+        {
+            out_file_stream << std::right << std::setw(22) << std::setprecision(15) << std::scientific << _prop_test[ss] << std::setw(2) << ", " << std::setw(22) << _prop_test_est[ss];
+            for(int ff = 0; ff < _n_dim - 1; ++ff)
+                out_file_stream << std::right << std::setw(2) << ", " << std::setw(22) << std::setprecision(15) << _feats[ff]->test_value()[ss];
+            out_file_stream << std::endl;
+        }
     }
     out_file_stream.close();
 }
@@ -171,6 +336,8 @@ void Model::register_python()
 {
     using namespace boost::python;
     class_<Model>("Model", init<std::vector<double>, std::vector<double>, std::vector<model_node_ptr>, std::vector<int>, std::vector<int>>())
+        .def(init<std::string>())
+        .def(init<std::string, std::string>())
         .def("predict", &Model::predict)
         .def("fit", &Model::predict_train)
         .def("__str__", &Model::toString)
@@ -178,20 +345,16 @@ void Model::register_python()
         .def_readonly("_n_samp_train", &Model::_n_samp_train)
         .def_readonly("_n_samp_test", &Model::_n_samp_test)
         .def_readonly("_n_dim", &Model::_n_dim)
-        .def_readonly("_feats", &Model::_feats)
-        .def_readonly("_coefs", &Model::_coefs)
-        .def_readonly("_prop_train", &Model::_prop_train)
-        .def_readonly("_prop_test", &Model::_prop_test)
-        .def_readonly("_train_error", &Model::_train_error)
-        .def_readonly("_test_error", &Model::_test_error)
-        .def_readonly("_D_train", &Model::_D_train)
-        .def_readonly("_D_test", &Model::_D_test)
-        .def_readonly("_prop_train_est", &Model::_prop_train_est)
-        .def_readonly("_prop_test_est", &Model::_prop_test_est)
-        .def_readonly("_task_sizes_train", &Model::_task_sizes_train)
-        .def_readonly("_task_sizes_test", &Model::_task_sizes_test)
+        .add_property("prop_train_est", &Model::prop_train_est)
+        .add_property("prop_test_est", &Model::prop_test_est)
+        .add_property("prop_train", &Model::prop_train)
+        .add_property("prop_test", &Model::prop_test)
+        .add_property("train_error", &Model::train_error)
+        .add_property("test_error", &Model::test_error)
+        .add_property("feats", &Model::feats)
+        .add_property("coefs", &Model::coefs)
         .add_property("rmse", &Model::rmse)
         .add_property("test_rmse", &Model::test_rmse)
         .add_property("max_ae", &Model::max_ae)
         .add_property("test_max_ae", &Model::test_max_ae);
-}
\ No newline at end of file
+}
diff --git a/src/descriptor_identifier/Model/Model.hpp b/src/descriptor_identifier/Model/Model.hpp
index 7f13c1c3..e0a47000 100644
--- a/src/descriptor_identifier/Model/Model.hpp
+++ b/src/descriptor_identifier/Model/Model.hpp
@@ -1,6 +1,8 @@
 #ifndef MODEL
 #define MODEL
 
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/trim.hpp>
 #include <boost/filesystem.hpp>
 #include <boost/python.hpp>
 
@@ -9,6 +11,10 @@
 #include<iostream>
 
 #include <feature_creation/node/ModelNode.hpp>
+#include <utils/string_utils.hpp>
+
+namespace python = boost::python;
+namespace np = boost::python::numpy;
 
 typedef std::shared_ptr<ModelNode> model_node_ptr;
 /**
@@ -23,7 +29,7 @@ class Model
 
     std::vector<model_node_ptr> _feats; //!< List of features in the model
 
-    std::vector<std::vector<double>> _coefs; //!< Coefficients for teh features
+    std::vector<std::vector<double>> _coefs; //!< Coefficients for the features
     std::vector<double> _prop_train; //!< The property to be modeled
     std::vector<double> _prop_test; //!< The property to be modeled
     std::vector<double> _train_error; //!< The error of the model
@@ -45,6 +51,10 @@ public:
      */
     Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<model_node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test);
 
+    Model(std::string train_file);
+    Model(std::string train_file, std::string test_file);
+
+    std::vector<std::string> populate_model(std::string filename, bool train);
 
     /**
      * @brief Convert the model to a string
@@ -89,16 +99,34 @@ public:
         return std::abs(*std::max_element(_test_error.data(), _test_error.data() + _n_samp_test, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}));
     }
 
+    inline python::list coefs()
+    {
+        python::list coef_lst;
+        for(auto& task_coefs : _coefs)
+            coef_lst.append<python::list>(python_conv_utils::to_list<double>(task_coefs));
+        return coef_lst;
+    }
 
-    /**
-     * @brief Print model to a file
-     */
-    void test_to_file(std::string filename, std::vector<int> test_inds);
+    inline python::list feats()
+    {
+        python::list feat_lst;
+        for(auto& feat : _feats)
+            feat_lst.append<ModelNode>(*feat);
+        return feat_lst;
+    }
+
+    inline np::ndarray prop_train_est(){return python_conv_utils::to_ndarray<double>(_prop_train_est);}
+    inline np::ndarray prop_test_est(){return python_conv_utils::to_ndarray<double>(_prop_test_est);}
+    inline np::ndarray prop_train(){return python_conv_utils::to_ndarray<double>(_prop_train);}
+    inline np::ndarray prop_test(){return python_conv_utils::to_ndarray<double>(_prop_test);}
+    inline np::ndarray train_error(){return python_conv_utils::to_ndarray<double>(_train_error);}
+    inline np::ndarray test_error(){return python_conv_utils::to_ndarray<double>(_test_error);}
 
     /**
      * @brief Print model to a file
      */
-    void train_to_file(std::string filename);
+    void to_file(std::string filename, bool train = true, std::vector<int> test_inds = {});
+
 
     static void register_python();
 };
diff --git a/src/descriptor_identifier/SISSORegressor.cpp b/src/descriptor_identifier/SISSORegressor.cpp
index c7fdca74..8d124478 100644
--- a/src/descriptor_identifier/SISSORegressor.cpp
+++ b/src/descriptor_identifier/SISSORegressor.cpp
@@ -1,6 +1,7 @@
 #include <descriptor_identifier/SISSORegressor.hpp>
 
-SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, int n_dim, int n_residual):
+
+SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual):
     _prop(prop),
     _prop_test(prop_test),
     _a((n_dim + 1) * prop.size()),
@@ -9,6 +10,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
     _s(n_dim + 1),
     _task_sizes_train(task_sizes_train),
     _task_sizes_test(task_sizes_test),
+    _leave_out_inds(leave_out_inds),
     _feat_space(feat_space),
     _mpi_comm(feat_space->mpi_comm()),
     _n_samp(prop.size()),
@@ -28,7 +30,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
     _work = std::vector<double>(_lwork, 0.0);
 }
 
-SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual) :
+SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, python::list leave_out_inds, int n_dim, int n_residual) :
     _prop(python_conv_utils::from_ndarray<double>(prop)),
     _prop_test(python_conv_utils::from_ndarray<double>(prop_test)),
     _a((n_dim + 1) * prop.shape(0)),
@@ -37,6 +39,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::nda
     _s(n_dim + 1),
     _task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)),
     _task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)),
+    _leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)),
     _feat_space(feat_space),
     _mpi_comm(feat_space->mpi_comm()),
     _n_samp(prop.shape(0)),
@@ -56,7 +59,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::nda
     _work = std::vector<double>(_lwork, 0.0);
 }
 
-SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual) :
+SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, python::list leave_out_inds, int n_dim, int n_residual) :
     _prop(python_conv_utils::from_list<double>(prop)),
     _prop_test(python_conv_utils::from_list<double>(prop_test)),
     _a((n_dim + 1) * boost::python::len(prop)),
@@ -65,6 +68,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python:
     _s(n_dim + 1),
     _task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)),
     _task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)),
+    _leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)),
     _feat_space(feat_space),
     _mpi_comm(feat_space->mpi_comm()),
     _n_samp(boost::python::len(prop)),
@@ -158,6 +162,12 @@ void SISSORegressor::fit()
         model_node_ptr model_feat = std::make_shared<ModelNode>(_feat_space->phi_selected()[rr]->arr_ind(), _feat_space->phi_selected()[rr]->rung(), _feat_space->phi_selected()[rr]->expr(), _feat_space->phi_selected()[rr]->value(), _feat_space->phi_selected()[rr]->test_value(), _feat_space->phi_selected()[rr]->unit());
         models.push_back(Model(_prop, _prop_test, {model_feat}, _task_sizes_train, _task_sizes_test));
         models.back().copy_error(&residual[rr * _n_samp]);
+        if(_mpi_comm->rank() == 0)
+        {
+            models.back().to_file("models/train_dim_1_model_" + std::to_string(rr) + ".dat");
+            if(_leave_out_inds.size() > 0)
+                models.back().to_file("models/test_dim_1_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds);
+        }
     }
     _models.push_back(models);
 
@@ -182,10 +192,16 @@ void SISSORegressor::fit()
         _mpi_comm->barrier();
         duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
         if(_mpi_comm->rank() == 0)
+        {
             std::cout << "Time for l0-norm: " << duration << std::endl;
 
-        for(int rr = 0; rr < _n_residual; ++rr)
-            _models.back()[rr].copy_error(&residual[rr * _n_samp]);
+            for(int rr = 0; rr < _n_residual; ++rr)
+            {
+                _models.back()[rr].to_file("models/train_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat");
+                if(_leave_out_inds.size() > 0)
+                    _models.back()[rr].to_file("models/test_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds);
+            }
+        }
     }
 }
 
@@ -275,8 +291,8 @@ python::list SISSORegressor::models_py()
 void SISSORegressor::register_python()
 {
     using namespace boost::python;
-    class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, python::list, python::list, int, int>())
-        .def(init<std::shared_ptr<FeatureSpace>, python::list, python::list, python::list, python::list, int, int>())
+    class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, python::list, python::list, python::list, int, int>())
+        .def(init<std::shared_ptr<FeatureSpace>, python::list, python::list, python::list, python::list, python::list, int, int>())
         .def("fit", &SISSORegressor::fit)
         .add_property("prop", &SISSORegressor::prop_py)
         .add_property("prop_test", &SISSORegressor::prop_test_py)
diff --git a/src/descriptor_identifier/SISSORegressor.hpp b/src/descriptor_identifier/SISSORegressor.hpp
index da4b400f..ec1e49ac 100644
--- a/src/descriptor_identifier/SISSORegressor.hpp
+++ b/src/descriptor_identifier/SISSORegressor.hpp
@@ -27,6 +27,7 @@ protected:
 
     std::vector<int> _task_sizes_train;
     std::vector<int> _task_sizes_test;
+    std::vector<int> _leave_out_inds;
     std::shared_ptr<FeatureSpace> _feat_space; //!< Feature Space for the problem
     std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPI Communicator
 
@@ -47,9 +48,11 @@ public:
      */
     SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, int n_dim, int n_residual);
 
-    SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual);
+    SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual);
 
-    SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual);
+    SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, python::list leave_out_inds, int n_dim, int n_residual);
+
+    SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, python::list leave_out_inds, int n_dim, int n_residual);
 
     /**
      * @brief Get the optimal size of the working array
diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp
index b853bc08..fc168e42 100644
--- a/src/feature_creation/feature_space/FeatureSpace.cpp
+++ b/src/feature_creation/feature_space/FeatureSpace.cpp
@@ -116,6 +116,22 @@ FeatureSpace::FeatureSpace(
     initialize_fs(python_conv_utils::from_ndarray<double>(prop));
 }
 
+boost::python::list FeatureSpace::phi0_py()
+{
+    python::list feat_lst;
+    for(auto& feat : _phi_0)
+        feat_lst.append<FeatureNode>(FeatureNode(feat->feat_ind(), feat->expr(), feat->value(), feat->test_value(), feat->unit()));
+    return feat_lst;
+}
+
+boost::python::list FeatureSpace::phi_selected_py()
+{
+    python::list feat_lst;
+    for(auto& feat : _phi_selected)
+        feat_lst.append<ModelNode>(ModelNode(feat->d_mat_ind(), feat->rung(), feat->expr(), feat->value(), feat->test_value(), feat->unit()));
+    return feat_lst;
+}
+
 void FeatureSpace::initialize_fs(std::vector<double> prop)
 {
     if(_n_rung_store == -1)
@@ -147,7 +163,6 @@ void FeatureSpace::initialize_fs(std::vector<double> prop)
     generate_feature_space(prop);
     _scores.reserve(_phi.size());
     _scores.resize(_phi.size());
-
 }
 
 void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound, double u_bound)
@@ -795,7 +810,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
     }
     if(_mpi_comm->rank() == 0)
         out_file_stream.close();
-
 }
 
 void FeatureSpace::register_python()
@@ -810,7 +824,6 @@ void FeatureSpace::register_python()
         .def("sis", sis_ndarray)
         .def("feat_in_phi", &FeatureSpace::feat_in_phi)
         .add_property("phi_selected", &FeatureSpace::phi_selected_py)
-        .add_property("phi", &FeatureSpace::phi_py)
         .add_property("phi0", &FeatureSpace::phi0_py)
         .add_property("scores", &FeatureSpace::scores_py)
         .add_property("task_sizes", &FeatureSpace::task_sizes_py)
diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp
index 02bde78c..3b727b07 100644
--- a/src/feature_creation/feature_space/FeatureSpace.hpp
+++ b/src/feature_creation/feature_space/FeatureSpace.hpp
@@ -3,6 +3,7 @@
 
 #include <mpi_interface/MPI_Interface.hpp>
 #include <feature_creation/node/FeatureNode.hpp>
+#include <feature_creation/node/ModelNode.hpp>
 #include <feature_creation/node/operator_nodes/allowed_ops.hpp>
 #include <feature_creation/node/value_storage/nodes_value_containers.hpp>
 #include <utils/project.hpp>
@@ -137,22 +138,19 @@ public:
      */
     inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
 
-    inline boost::python::list phi_selected_py(){return python_conv_utils::to_list<node_ptr>(_phi_selected);};
+    boost::python::list phi_selected_py();
 
     /**
      * @brief Accessor function for _phi
      */
     inline std::vector<node_ptr> phi(){return _phi;};
 
-    inline boost::python::list phi_py(){return python_conv_utils::to_list<node_ptr>(_phi);};
-
     /**
      * @brief Accessor function for _phi_0
      */
     inline std::vector<node_ptr> phi0(){return _phi_0;};
 
-    inline boost::python::list phi0_py(){return python_conv_utils::to_list<node_ptr>(_phi_0);};
-
+    boost::python::list phi0_py();
     /**
      * @brief Accessor function for _scores
      */
diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp
index f68cf773..6184a141 100644
--- a/src/feature_creation/node/FeatureNode.cpp
+++ b/src/feature_creation/node/FeatureNode.cpp
@@ -3,15 +3,18 @@
 FeatureNode::FeatureNode()
 {}
 
-FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit) :
+FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool set_val) :
     Node(feat_ind, value.size(), test_value.size()),
     _value(value),
     _test_value(test_value),
     _unit(unit),
     _expr(expr)
 {
-    set_value();
-    set_test_value();
+    if(set_val)
+    {
+        set_value();
+        set_test_value();
+    }
 }
 
 FeatureNode::FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Unit unit) :
diff --git a/src/feature_creation/node/FeatureNode.hpp b/src/feature_creation/node/FeatureNode.hpp
index 1ef04770..7979fdee 100644
--- a/src/feature_creation/node/FeatureNode.hpp
+++ b/src/feature_creation/node/FeatureNode.hpp
@@ -60,7 +60,7 @@ public:
      * @param value Value of the feature for each test sample
      * @param unit Unit of the feature
      */
-    FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit);
+    FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool set_val = true);
     FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Unit unit);
     FeatureNode(int feat_ind, std::string expr, python::list value, python::list test_value, Unit unit);
 
diff --git a/src/feature_creation/node/ModelNode.cpp b/src/feature_creation/node/ModelNode.cpp
index 75d25160..246751e8 100644
--- a/src/feature_creation/node/ModelNode.cpp
+++ b/src/feature_creation/node/ModelNode.cpp
@@ -4,7 +4,7 @@ ModelNode::ModelNode()
 {}
 
 ModelNode::ModelNode(int feat_ind, int rung, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit) :
-    FeatureNode(feat_ind, expr, value, test_value, unit),
+    FeatureNode(feat_ind, expr, value, test_value, unit, false),
     _rung(rung)
  {}
 
diff --git a/src/feature_creation/node/Node.cpp b/src/feature_creation/node/Node.cpp
index 3bde8ded..851b6b07 100644
--- a/src/feature_creation/node/Node.cpp
+++ b/src/feature_creation/node/Node.cpp
@@ -97,6 +97,8 @@ void Node::register_python()
     class_<NodeWrap, boost::noncopyable>("Node", no_init)
         .def("reindex", reindex_1)
         .def("reindex", reindex_2)
+        .def("__str__", &Node::expr)
+        .def("__repr__", &Node::expr)
         .add_property("n_samp", &Node::n_samp)
         .add_property("n_test_samp", &Node::n_test_samp)
         .add_property("feat_ind", &Node::feat_ind)
@@ -112,7 +114,7 @@ void Node::register_python()
         .def("is_nan", pure_virtual(&Node::is_nan))
         .def("is_const", pure_virtual(&Node::is_const))
         .def("rung", pure_virtual(&Node::rung))
-        ;
+    ;
 }
 
 BOOST_SERIALIZATION_ASSUME_ABSTRACT(Node)
diff --git a/src/main.cpp b/src/main.cpp
index b1535fa8..26df5217 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -36,7 +36,7 @@ int main(int argc, char const *argv[])
         std::cout<< "time input_parsing/Feature space generation: "<< duration << std::endl;
 
     node_value_arrs::initialize_d_matrix_arr();
-    SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._n_dim, IP._n_residuals);
+    SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._leave_out_inds, IP._n_dim, IP._n_residuals);
     sisso.fit();
 
     if(mpi_setup::comm->rank() == 0)
@@ -49,12 +49,12 @@ int main(int argc, char const *argv[])
             else
                 std::cout << std::endl;
             std::cout << sisso.models()[ii][0] << "\n" << std::endl;
-            for(int jj = 0; jj < sisso.models()[ii].size(); ++jj)
-            {
-                sisso.models()[ii][jj].train_to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat");
-                if(IP._prop_test.size() > 0)
-                    sisso.models()[ii][jj].test_to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat", IP._leave_out_inds);
-            }
+            // for(int jj = 0; jj < sisso.models()[ii].size(); ++jj)
+            // {
+            //     sisso.models()[ii][jj].to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat");
+            //     if(IP._prop_test.size() > 0)
+            //         sisso.models()[ii][jj].to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat", false, IP._leave_out_inds);
+            // }
         }
     }
 
diff --git a/src/utils/string_utils.cpp b/src/utils/string_utils.cpp
new file mode 100644
index 00000000..9691d2fb
--- /dev/null
+++ b/src/utils/string_utils.cpp
@@ -0,0 +1,11 @@
+#include <utils/string_utils.hpp>
+
+std::vector<std::string> str_utils::split_string_trim(std::string str, std::string split_tokens)
+{
+    std::vector<std::string> split_str;
+    boost::algorithm::split(split_str, str, boost::algorithm::is_any_of(split_tokens));
+    for(auto& str_sec : split_str)
+        boost::algorithm::trim(str_sec);
+
+    return split_str;
+}
diff --git a/src/utils/string_utils.hpp b/src/utils/string_utils.hpp
new file mode 100644
index 00000000..d7a56e72
--- /dev/null
+++ b/src/utils/string_utils.hpp
@@ -0,0 +1,18 @@
+#ifndef STRING_UTILS
+#define STRING_UTILS
+
+#include <cmath>
+#include <string>
+#include <vector>
+#include <iostream>
+
+#include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/trim.hpp>
+
+namespace str_utils
+{
+    std::vector<std::string> split_string_trim(std::string str, std::string split_tokens = ",;:");
+}
+
+
+#endif
\ No newline at end of file
-- 
GitLab