From 2894acc8e98da342b283c801feef0c9c4b4b1077 Mon Sep 17 00:00:00 2001
From: Thomas <purcell@fhi-berlin.mpg.de>
Date: Sun, 15 Aug 2021 20:35:51 +0200
Subject: [PATCH] Add the task name and sample ID column to model files

Also backwards compatiable
---
 src/descriptor_identifier/model/Model.cpp     | 89 ++++++++++++++-----
 src/descriptor_identifier/model/Model.hpp     | 14 ++-
 .../model/ModelClassifier.cpp                 | 30 +++++--
 .../model/ModelClassifier.hpp                 | 10 ++-
 .../model/ModelLogRegressor.cpp               |  7 +-
 .../model/ModelLogRegressor.hpp               |  8 +-
 .../model/ModelRegressor.cpp                  | 28 +++++-
 .../model/ModelRegressor.hpp                  | 10 ++-
 .../solver/SISSOClassifier.cpp                |  9 +-
 .../solver/SISSOClassifier.hpp                | 12 +--
 .../solver/SISSOLogRegressor.cpp              |  9 +-
 .../solver/SISSOLogRegressor.hpp              | 12 +--
 .../solver/SISSORegressor.cpp                 |  9 +-
 .../solver/SISSORegressor.hpp                 | 12 +--
 .../solver/SISSOSolver.cpp                    |  4 +-
 .../solver/SISSOSolver.hpp                    | 14 +--
 src/inputs/InputParser.cpp                    | 11 ++-
 src/inputs/InputParser.hpp                    |  2 +-
 src/main.cpp                                  |  6 +-
 .../bindings_docstring_keyed.cpp              | 12 +--
 .../descriptor_identifier/SISSOClassifier.cpp |  8 +-
 .../SISSOLogRegressor.cpp                     |  8 +-
 .../descriptor_identifier/SISSORegressor.cpp  |  8 +-
 .../descriptor_identifier/SISSOSolver.cpp     |  8 +-
 src/python/py_interface/get_solver.py         | 10 +--
 src/python/py_interface/import_dataframe.py   | 18 ++--
 .../model/test_model_classifier.cc            | 23 +++--
 .../model/test_model_log_regressor.cc         | 18 +++-
 .../model/test_model_regressor.cc             | 18 +++-
 .../test_sisso_log_regressor.cc               | 10 +++
 .../sisso_regressor/test_sisso_regressor.cc   | 11 +++
 31 files changed, 321 insertions(+), 127 deletions(-)

diff --git a/src/descriptor_identifier/model/Model.cpp b/src/descriptor_identifier/model/Model.cpp
index 7ebc8e61..a0ff6139 100644
--- a/src/descriptor_identifier/model/Model.cpp
+++ b/src/descriptor_identifier/model/Model.cpp
@@ -26,8 +26,14 @@ Model::Model(
     const Unit prop_unit,
     const std::shared_ptr<LossFunction> loss,
     const std::vector<model_node_ptr> feats,
-    const std::vector<int> leave_out_inds
+    const std::vector<int> leave_out_inds,
+    const std::vector<std::string> sample_ids_train,
+    const std::vector<std::string> sample_ids_test,
+    const std::vector<std::string> task_names
 ) :
+    _sample_ids_train(sample_ids_train),
+    _sample_ids_test(sample_ids_test),
+    _task_names(task_names),
     _n_samp_train(feats[0]->n_samp()),
     _n_samp_test(feats[0]->n_samp_test()),
     _n_dim(feats.size()),
@@ -161,17 +167,7 @@ void Model::to_file(const std::string filename, const bool train) const
     out_file_stream << "# Property Label: $" << str_utils::latexify(_prop_label) << "$; Unit of the Property: " << _prop_unit.toString() << std::endl;
 
     out_file_stream << error_summary_string(train);
-    out_file_stream << coefs_header();
-
-    for(int cc = 0; cc < _coefs.size(); ++cc)
-    {
-        out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc) + ", ";
-        for(auto& coeff : _coefs[cc])
-        {
-            out_file_stream << std::setprecision(15) << std::scientific << std::right << std::setw(22) << coeff << std::setw(2) << ", ";
-        }
-        out_file_stream << "\n";
-    }
+    out_file_stream << write_coefs();
 
     out_file_stream << "# Feature Rung, Units, and Expressions" << std::endl;
     for(int ff = 0; ff < _feats.size(); ++ff)
@@ -185,13 +181,17 @@ void Model::to_file(const std::string filename, const bool train) const
         out_file_stream << boost::algorithm::join(_feats[ff]->get_x_in_expr_list(), ",") << std::endl;
     }
 
+    int task_header_w = std::max(
+        6,
+        static_cast<int>(std::max_element(_task_names.begin(), _task_names.end(), [](std::string s1, std::string s2){return s1.size() <= s2.size();})->size())
+    );
     out_file_stream << "# Number of Samples Per Task" << std::endl;
     if(train)
     {
-        out_file_stream << std::setw(10) << std::left << "# Task," << std::setw(24) << "n_mats_train" << std::endl;
+        out_file_stream << std::setw(task_header_w) << std::left << "# Task" << std::setw(2) << ", " << std::setw(24) << "n_mats_train" << std::endl;
         for(int tt = 0; tt < task_sizes_train_vec.size(); ++tt)
         {
-            out_file_stream << std::left << std::setw(10) << "# " + std::to_string(tt) + ", ";
+            out_file_stream << std::left << std::setw(task_header_w) << "# " + _task_names[tt] << std::setw(2) << ", ";
             out_file_stream << std::left << std::setw(22) << task_sizes_train_vec[tt] << std::endl;
         }
     }
@@ -200,7 +200,7 @@ void Model::to_file(const std::string filename, const bool train) const
         out_file_stream << std::setw(10) << std::left << "# Task," << std::setw(24) << "n_mats_test" << std::endl;
         for(int tt = 0; tt < task_sizes_test_vec.size(); ++tt)
         {
-            out_file_stream << std::left << std::setw(10) << "# " + std::to_string(tt) + ", ";
+            out_file_stream << std::left << std::setw(10) << "# " + _task_names[tt] + ", ";
             out_file_stream << std::left << std::setw(22) << task_sizes_test_vec[tt] << std::endl;
         }
 
@@ -212,7 +212,23 @@ void Model::to_file(const std::string filename, const bool train) const
         out_file_stream << " ]" << std::endl;
     }
 
-    out_file_stream << "\n" << std::setw(22) << std::left << "# Property Value" << std::setw(2) << ", " << std::setw(22) << " Property Value (EST)";
+    int max_sample_id_len = 12;
+    if(train)
+    {
+        max_sample_id_len = std::max(
+            max_sample_id_len,
+            static_cast<int>(std::max_element(_sample_ids_train.begin(), _sample_ids_train.end(), [](std::string s1, std::string s2){return s1.size() <= s2.size();})->size())
+        );
+    }
+    else
+    {
+        max_sample_id_len = std::max(
+            max_sample_id_len,
+            static_cast<int>(std::max_element(_sample_ids_test.begin(), _sample_ids_test.end(), [](std::string s1, std::string s2){return s1.size() <= s2.size();})->size())
+        );
+    }
+    out_file_stream << "\n" << std::setw(max_sample_id_len) << std::left << "# Sample ID" << std::setw(2) << ", ";
+    out_file_stream << std::setw(22) << std::left << "Property Value" << std::setw(2) << ", " << std::setw(22) << " Property Value (EST)";
     for(int ff = 0; ff < _feats.size(); ++ff)
     {
         out_file_stream << std::setw(2) << ", " << std::setw(22) << " Feature " + std::to_string(ff) + " Value";
@@ -223,6 +239,7 @@ void Model::to_file(const std::string filename, const bool train) const
     {
         for(int ss = 0; ss < _n_samp_train; ++ss)
         {
+            out_file_stream << std::left << std::setw(max_sample_id_len) << _sample_ids_train[ss] << std::setw(2) << ", ";
             out_file_stream << std::right << std::setw(22) << std::setprecision(15) << std::scientific << prop_train_vec[ss] << std::setw(2) << ", ";
             out_file_stream << std::setw(22) << prop_train_est_vec[ss];
             for(int ff = 0; ff < _n_dim; ++ff)
@@ -236,6 +253,7 @@ void Model::to_file(const std::string filename, const bool train) const
     {
         for(int ss = 0; ss < _n_samp_test; ++ss)
         {
+            out_file_stream << std::left << std::setw(max_sample_id_len) << _sample_ids_test[ss] << std::setw(2) << ", ";
             out_file_stream << std::right << std::setw(22) << std::setprecision(15) << std::scientific << prop_test_vec[ss] << std::setw(2) << ", ";
             out_file_stream << std::setw(22) << prop_test_est_vec[ss];
             for(int ff = 0; ff < _n_dim; ++ff)
@@ -435,11 +453,16 @@ void Model::populate_model(const std::string train_filename, const std::string t
     {
         ++n_task;
         split_line = str_utils::split_string_trim(line);
+        _task_names.push_back(split_line[0]);
         _n_samp_train += std::stoi(split_line[1]);
         task_sizes_train.push_back(std::stoi(split_line[1]));
         if(with_test)
         {
             split_line = str_utils::split_string_trim(test_line);
+            if(split_line[0].compare(_task_names.back()) != 0)
+            {
+                throw std::logic_error("The task names for the test and train files are not in the same order.");
+            }
             _n_samp_test += std::stoi(split_line[1]);
             task_sizes_test.push_back(std::stoi(split_line[1]));
             std::getline(test_file_stream, test_line);
@@ -454,6 +477,9 @@ void Model::populate_model(const std::string train_filename, const std::string t
     std::vector<double> prop_train(_n_samp_train);
     std::vector<double> prop_test(_n_samp_test);
 
+    _sample_ids_train.resize(_n_samp_train);
+    _sample_ids_test.resize(_n_samp_test);
+
     if(with_test)
     {
         split_line = str_utils::split_string_trim(test_line, "[]");
@@ -479,16 +505,28 @@ void Model::populate_model(const std::string train_filename, const std::string t
 
     std::vector<std::vector<double>> feat_vals(n_dim, std::vector<double>(_n_samp_train, 0.0));
     std::vector<std::vector<double>> feat_test_vals(n_dim, std::vector<double>(_n_samp_test, 0.0));
+
+    bool with_samp_id = false;
     for(int ns = 0; ns < _n_samp_train; ++ns)
     {
         std::getline(train_file_stream, line);
         split_line = str_utils::split_string_trim(line);
+        if((split_line.size() > _n_dim + 2))
+        {
+            with_samp_id = true;
+            _sample_ids_train[ns] = split_line[0];
+        }
+        else
+        {
+            with_samp_id = false;
+            _sample_ids_train[ns] = std::to_string(ns);
+        }
 
-        prop_train[ns] = std::stod(split_line[0]);
+        prop_train[ns] = std::stod(split_line[with_samp_id]);
 
         for(int nf = 0; nf < n_dim; ++nf)
         {
-            feat_vals[nf][ns] = std::stod(split_line[2 + nf]);
+            feat_vals[nf][ns] = std::stod(split_line[2 + nf + with_samp_id]);
         }
     }
     for(int ns = 0; ns < _n_samp_test; ++ns)
@@ -496,11 +534,22 @@ void Model::populate_model(const std::string train_filename, const std::string t
         std::getline(test_file_stream, test_line);
         split_line = str_utils::split_string_trim(test_line);
 
-        prop_test[ns] = std::stod(split_line[0]);
+        if((split_line.size() > _n_dim + 2))
+        {
+            with_samp_id = true;
+            _sample_ids_test[ns] = split_line[0];
+        }
+        else
+        {
+            with_samp_id = false;
+            _sample_ids_test[ns] = std::to_string(ns);
+        }
+
+        prop_test[ns] = std::stod(split_line[with_samp_id]);
 
         for(int nf = 0; nf < n_dim; ++nf)
         {
-            feat_test_vals[nf][ns] = std::stod(split_line[2 + nf]);
+            feat_test_vals[nf][ns] = std::stod(split_line[2 + nf + with_samp_id]);
         }
     }
     train_file_stream.close();
diff --git a/src/descriptor_identifier/model/Model.hpp b/src/descriptor_identifier/model/Model.hpp
index 61a1e0c1..3940ba76 100644
--- a/src/descriptor_identifier/model/Model.hpp
+++ b/src/descriptor_identifier/model/Model.hpp
@@ -42,6 +42,10 @@
 class Model
 {
 protected:
+    std::vector<std::string> _sample_ids_train; //!< Vector storing all sample ids for the training samples
+    std::vector<std::string> _sample_ids_test; //!< Vector storing all sample ids for the test samples
+    std::vector<std::string> _task_names; //!< Vector storing the ID of the task names
+
     int _n_samp_train; //!< The number of samples per feature
     int _n_samp_test; //!< The number of test samples per feature
     int _n_dim; //!< The number of dimensions of the model
@@ -72,13 +76,19 @@ public:
      * @param loss The LossFunction used to calculate the model
      * @param feats The features of the model
      * @param leave_out_inds The indexes of the samples for the test set
+     * @param sample_ids_train A vector storing all sample ids for the training samples
+     * @param sample_ids_test A vector storing all sample ids for the test samples
+     * @param task_names A vector storing the ID of the task names
      */
     Model(
         const std::string prop_label,
         const Unit prop_unit,
         const std::shared_ptr<LossFunction> loss,
         const std::vector<model_node_ptr> feats,
-        const std::vector<int> leave_out_inds
+        const std::vector<int> leave_out_inds,
+        const std::vector<std::string> sample_ids_train,
+        const std::vector<std::string> sample_ids_test,
+        const std::vector<std::string> task_names
     );
 
     /**
@@ -289,7 +299,7 @@ public:
     /**
      * @brief Get the coefficients list header for output file
      */
-    virtual std::string coefs_header() const = 0;
+    virtual std::string write_coefs() const = 0;
 
     // DocString: model_fix_intercept
     /**
diff --git a/src/descriptor_identifier/model/ModelClassifier.cpp b/src/descriptor_identifier/model/ModelClassifier.cpp
index 480c04fc..74e75873 100644
--- a/src/descriptor_identifier/model/ModelClassifier.cpp
+++ b/src/descriptor_identifier/model/ModelClassifier.cpp
@@ -26,9 +26,12 @@ ModelClassifier::ModelClassifier(
     const Unit prop_unit,
     const std::shared_ptr<LossFunction> loss,
     const std::vector<model_node_ptr> feats,
-    const std::vector<int> leave_out_inds
+    const std::vector<int> leave_out_inds,
+    const std::vector<std::string> sample_ids_train,
+    const std::vector<std::string> sample_ids_test,
+    const std::vector<std::string> task_names
 ) :
-    Model(prop_label, prop_unit, loss, feats, leave_out_inds),
+    Model(prop_label, prop_unit, loss, feats, leave_out_inds, sample_ids_train, sample_ids_test, task_names),
     _train_n_convex_overlap(0),
     _test_n_convex_overlap(0),
     _n_class(loss->n_class())
@@ -209,11 +212,16 @@ std::string ModelClassifier::error_summary_string(bool train) const
 
     return error_stream.str();
 }
-std::string ModelClassifier::coefs_header() const
+std::string ModelClassifier::write_coefs() const
 {
     std::stringstream coef_head_stream;
-    coef_head_stream << "# Plane Divider" << std::endl;
-    coef_head_stream << std::setw(10) << std::left << "# Task";
+    coef_head_stream << "# Decision Boundaries" << std::endl;
+    int n_db = _n_class * (_n_class - 1) / 2;
+    int task_header_w = 1 + static_cast<int>(std::floor(std::log10(n_db))) + std::max(
+        6,
+        static_cast<int>(std::max_element(_task_names.begin(), _task_names.end(), [](std::string s1, std::string s2){return s1.size() <= s2.size();})->size())
+    );
+    coef_head_stream << std::setw(task_header_w + 2) << std::left << "# Task";
 
     for(int cc = 0; cc < _coefs[0].size() - 1; ++cc)
     {
@@ -221,5 +229,17 @@ std::string ModelClassifier::coefs_header() const
     }
     coef_head_stream << " b" << std::endl;
 
+    for(int tt = 0; tt < _task_names.size(); ++tt)
+    {
+        for(int db = 0; db < n_db; ++db)
+        {
+            coef_head_stream << std::setw(task_header_w) << std::left << "# " + _task_names[tt] + "_" + std::to_string(db) << std::setw(2) << ", ";
+            for(auto& coeff : _coefs[tt * n_db + db])
+            {
+                coef_head_stream << std::setprecision(15) << std::scientific << std::right << std::setw(22) << coeff << std::setw(2) << ", ";
+            }
+            coef_head_stream << "\n";
+        }
+    }
     return coef_head_stream.str();
 }
diff --git a/src/descriptor_identifier/model/ModelClassifier.hpp b/src/descriptor_identifier/model/ModelClassifier.hpp
index fe29e171..28ef2d60 100644
--- a/src/descriptor_identifier/model/ModelClassifier.hpp
+++ b/src/descriptor_identifier/model/ModelClassifier.hpp
@@ -53,13 +53,19 @@ public:
      * @param loss The LossFunction used to calculate the model
      * @param feats The features of the model
      * @param leave_out_inds The indexes of the samples for the test set
+     * @param sample_ids_train A vector storing all sample ids for the training samples
+     * @param sample_ids_test A vector storing all sample ids for the test samples
+     * @param task_names A vector storing the ID of the task names
      */
     ModelClassifier(
         const std::string prop_label,
         const Unit prop_unit,
         const std::shared_ptr<LossFunction> loss,
         const std::vector<model_node_ptr> feats,
-        const std::vector<int> leave_out_inds
+        const std::vector<int> leave_out_inds,
+        const std::vector<std::string> sample_ids_train,
+        const std::vector<std::string> sample_ids_test,
+        const std::vector<std::string> task_names
     );
 
     // DocString: model_class_init_train
@@ -195,7 +201,7 @@ public:
     /**
      * @brief Get the coefficients list header for output file
      */
-    std::string coefs_header() const;
+    std::string write_coefs() const;
 
     /**
      * @brief Copy the training error into a different vector
diff --git a/src/descriptor_identifier/model/ModelLogRegressor.cpp b/src/descriptor_identifier/model/ModelLogRegressor.cpp
index 6bec56f6..a7519adc 100644
--- a/src/descriptor_identifier/model/ModelLogRegressor.cpp
+++ b/src/descriptor_identifier/model/ModelLogRegressor.cpp
@@ -29,9 +29,12 @@ ModelLogRegressor::ModelLogRegressor(
     const Unit prop_unit,
     const std::shared_ptr<LossFunction> loss,
     const std::vector<model_node_ptr> feats,
-    const std::vector<int> leave_out_inds
+    const std::vector<int> leave_out_inds,
+    const std::vector<std::string> sample_ids_train,
+    const std::vector<std::string> sample_ids_test,
+    const std::vector<std::string> task_names
 ) :
-    ModelRegressor(prop_label, prop_unit, loss, feats, leave_out_inds)
+    ModelRegressor(prop_label, prop_unit, loss, feats, leave_out_inds, sample_ids_train, sample_ids_test, task_names)
 {}
 
 ModelLogRegressor::ModelLogRegressor(const std::string train_file)
diff --git a/src/descriptor_identifier/model/ModelLogRegressor.hpp b/src/descriptor_identifier/model/ModelLogRegressor.hpp
index 1f6bce56..da7f5e09 100644
--- a/src/descriptor_identifier/model/ModelLogRegressor.hpp
+++ b/src/descriptor_identifier/model/ModelLogRegressor.hpp
@@ -51,13 +51,19 @@ public:
      * @param loss The LossFunction used to calculate the model
      * @param feats The features of the model
      * @param leave_out_inds The indexes of the samples for the test set
+     * @param sample_ids_train A vector storing all sample ids for the training samples
+     * @param sample_ids_test A vector storing all sample ids for the test samples
+     * @param task_names A vector storing the ID of the task names
      */
     ModelLogRegressor(
         const std::string prop_label,
         const Unit prop_unit,
         const std::shared_ptr<LossFunction> loss,
         const std::vector<model_node_ptr> feats,
-        const std::vector<int> leave_out_inds
+        const std::vector<int> leave_out_inds,
+        const std::vector<std::string> sample_ids_train,
+        const std::vector<std::string> sample_ids_test,
+        const std::vector<std::string> task_names
     );
 
     // DocString: model_log_reg_init_train
diff --git a/src/descriptor_identifier/model/ModelRegressor.cpp b/src/descriptor_identifier/model/ModelRegressor.cpp
index fd7de70a..4d809f64 100644
--- a/src/descriptor_identifier/model/ModelRegressor.cpp
+++ b/src/descriptor_identifier/model/ModelRegressor.cpp
@@ -29,9 +29,12 @@ ModelRegressor::ModelRegressor(
     const Unit prop_unit,
     const std::shared_ptr<LossFunction> loss,
     const std::vector<model_node_ptr> feats,
-    const std::vector<int> leave_out_inds
+    const std::vector<int> leave_out_inds,
+    const std::vector<std::string> sample_ids_train,
+    const std::vector<std::string> sample_ids_test,
+    const std::vector<std::string> task_names
 ) :
-    Model(prop_label, prop_unit, loss, feats, leave_out_inds)
+    Model(prop_label, prop_unit, loss, feats, leave_out_inds, sample_ids_train, sample_ids_test, task_names)
 {
     double rmse = (*_loss)(_feats);
 }
@@ -168,15 +171,22 @@ std::string ModelRegressor::error_summary_string(bool train) const
     return error_stream.str();
 }
 
-std::string ModelRegressor::coefs_header() const
+std::string ModelRegressor::write_coefs() const
 {
     std::stringstream coef_head_stream;
 
+    int task_header_w = std::max(
+        6,
+        static_cast<int>(std::max_element(_task_names.begin(), _task_names.end(), [](std::string s1, std::string s2){return s1.size() <= s2.size();})->size())
+    );
+
     coef_head_stream << "# Coefficients" << std::endl;
-    coef_head_stream << std::setw(10) << std::left << "# Task";
+    coef_head_stream << std::setw(task_header_w + 2) << std::left << "# Task";
 
     for(int cc = 0; cc < _coefs[0].size() - (!_fix_intercept); ++cc)
+    {
         coef_head_stream << std::setw(24) << " a" + std::to_string(cc);
+    }
 
     if(!_fix_intercept)
     {
@@ -187,6 +197,16 @@ std::string ModelRegressor::coefs_header() const
         coef_head_stream << std::endl;
     }
 
+    for(int cc = 0; cc < _coefs.size(); ++cc)
+    {
+        coef_head_stream << std::setw(task_header_w) << std::left << "# " + _task_names[cc] << std::setw(2) << ", ";
+        for(auto& coeff : _coefs[cc])
+        {
+            coef_head_stream << std::setprecision(15) << std::scientific << std::right << std::setw(22) << coeff << std::setw(2) << ", ";
+        }
+        coef_head_stream << "\n";
+    }
+
     return coef_head_stream.str();
 }
 
diff --git a/src/descriptor_identifier/model/ModelRegressor.hpp b/src/descriptor_identifier/model/ModelRegressor.hpp
index af386356..80484280 100644
--- a/src/descriptor_identifier/model/ModelRegressor.hpp
+++ b/src/descriptor_identifier/model/ModelRegressor.hpp
@@ -50,13 +50,19 @@ public:
      * @param loss The LossFunction used to calculate the model
      * @param feats The features of the model
      * @param leave_out_inds The indexes of the samples for the test set
+     * @param sample_ids_train A vector storing all sample ids for the training samples
+     * @param sample_ids_test A vector storing all sample ids for the test samples
+     * @param task_names A vector storing the ID of the task names
      */
     ModelRegressor(
         const std::string prop_label,
         const Unit prop_unit,
         const std::shared_ptr<LossFunction> loss,
         const std::vector<model_node_ptr> feats,
-        const std::vector<int> leave_out_inds
+        const std::vector<int> leave_out_inds,
+        const std::vector<std::string> sample_ids_train,
+        const std::vector<std::string> sample_ids_test,
+        const std::vector<std::string> task_names
     );
 
     // DocString: model_reg_init_train
@@ -195,7 +201,7 @@ public:
     /**
      * @brief Get the coefficients list header for output file
      */
-    std::string coefs_header() const;
+    std::string write_coefs() const;
 
     /**
      * @brief Copy the training error into a different vector
diff --git a/src/descriptor_identifier/solver/SISSOClassifier.cpp b/src/descriptor_identifier/solver/SISSOClassifier.cpp
index 91f7dc92..28726599 100644
--- a/src/descriptor_identifier/solver/SISSOClassifier.cpp
+++ b/src/descriptor_identifier/solver/SISSOClassifier.cpp
@@ -35,7 +35,7 @@ SISSOClassifier::SISSOClassifier(
     const int n_models_store,
     const std::vector<std::string> sample_ids_train,
     const std::vector<std::string> sample_ids_test,
-    const std::vector<std::string> task_keys
+    const std::vector<std::string> task_names
 ):
     SISSOSolver(
         "classification",
@@ -52,7 +52,7 @@ SISSOClassifier::SISSOClassifier(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         false
     ),
     _c(1000.0),
@@ -310,7 +310,10 @@ void SISSOClassifier::l0_regularization(const int n_dim)
                 _prop_unit,
                 loss_function_util::copy(_loss),
                 min_nodes[rr],
-                _leave_out_inds
+                _leave_out_inds,
+                _sample_ids_train,
+                _sample_ids_test,
+                _task_names
             )
         );
     }
diff --git a/src/descriptor_identifier/solver/SISSOClassifier.hpp b/src/descriptor_identifier/solver/SISSOClassifier.hpp
index bd854553..16ab12d0 100644
--- a/src/descriptor_identifier/solver/SISSOClassifier.hpp
+++ b/src/descriptor_identifier/solver/SISSOClassifier.hpp
@@ -67,7 +67,7 @@ public:
      * @param n_models_store The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      */
     SISSOClassifier(
         const std::shared_ptr<FeatureSpace> feat_space,
@@ -82,7 +82,7 @@ public:
         const int n_models_store,
         const std::vector<std::string> sample_ids_train,
         const std::vector<std::string> sample_ids_test,
-        const std::vector<std::string> task_keys
+        const std::vector<std::string> task_names
     );
 
     /**
@@ -153,7 +153,7 @@ public:
      * @param n_models_store (int) The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      */
     SISSOClassifier(
         std::shared_ptr<FeatureSpace> feat_space,
@@ -169,7 +169,7 @@ public:
         int n_models_store,
         py::list sample_ids_train,
         py::list sample_ids_test,
-        py::list task_keys
+        py::list task_names
     );
 
     // DocString: sisso_class_init_list
@@ -189,7 +189,7 @@ public:
      * @param n_models_store (int) The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      */
     SISSOClassifier(
         std::shared_ptr<FeatureSpace> feat_space,
@@ -205,7 +205,7 @@ public:
         int n_models_store,
         py::list sample_ids_train,
         py::list sample_ids_test,
-        py::list task_keys
+        py::list task_names
     );
 
     // DocString: sisso_class_models_py
diff --git a/src/descriptor_identifier/solver/SISSOLogRegressor.cpp b/src/descriptor_identifier/solver/SISSOLogRegressor.cpp
index ab36f0ed..9d502966 100644
--- a/src/descriptor_identifier/solver/SISSOLogRegressor.cpp
+++ b/src/descriptor_identifier/solver/SISSOLogRegressor.cpp
@@ -35,7 +35,7 @@ SISSOLogRegressor::SISSOLogRegressor(
     const int n_models_store,
     const std::vector<std::string> sample_ids_train,
     const std::vector<std::string> sample_ids_test,
-    const std::vector<std::string> task_keys,
+    const std::vector<std::string> task_names,
     const bool fix_intercept
 ):
     SISSORegressor(
@@ -52,7 +52,7 @@ SISSOLogRegressor::SISSOLogRegressor(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         fix_intercept
     )
 {
@@ -106,7 +106,10 @@ void SISSOLogRegressor::add_models(const std::vector<std::vector<int>> indexes)
             _prop_unit,
             loss_function_util::copy(_loss),
             min_nodes.back(),
-            _leave_out_inds
+            _leave_out_inds,
+            _sample_ids_train,
+            _sample_ids_test,
+            _task_names
         );
         _models.back().push_back(model);
     }
diff --git a/src/descriptor_identifier/solver/SISSOLogRegressor.hpp b/src/descriptor_identifier/solver/SISSOLogRegressor.hpp
index 2f8ce7d5..34c63511 100644
--- a/src/descriptor_identifier/solver/SISSOLogRegressor.hpp
+++ b/src/descriptor_identifier/solver/SISSOLogRegressor.hpp
@@ -57,7 +57,7 @@ public:
      * @param n_models_store The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      * @param fix_intrecept If true the bias term is fixed at 0
      */
     SISSOLogRegressor(
@@ -74,7 +74,7 @@ public:
         const int n_models_store,
         const std::vector<std::string> sample_ids_train,
         const std::vector<std::string> sample_ids_test,
-        const std::vector<std::string> task_keys,
+        const std::vector<std::string> task_names,
         const bool fix_intercept=false
     );
 
@@ -121,7 +121,7 @@ public:
      * @param n_models_store (int) The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      * @param fix_intrecept (bool) If true the bias term is fixed at 0
      */
     SISSOLogRegressor(
@@ -138,7 +138,7 @@ public:
         int n_models_store,
         py::list sample_ids_train,
         py::list sample_ids_test,
-        py::list task_keys,
+        py::list task_names,
         bool fix_intercept=false
     );
 
@@ -159,7 +159,7 @@ public:
      * @param n_models_store (int) The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      * @param fix_intrecept (bool) If true the bias term is fixed at 0
      */
     SISSOLogRegressor(
@@ -176,7 +176,7 @@ public:
         int n_models_store,
         py::list sample_ids_train,
         py::list sample_ids_test,
-        py::list task_keys,
+        py::list task_names,
         bool fix_intercept=false
     );
 
diff --git a/src/descriptor_identifier/solver/SISSORegressor.cpp b/src/descriptor_identifier/solver/SISSORegressor.cpp
index 7705db94..e6853bdc 100644
--- a/src/descriptor_identifier/solver/SISSORegressor.cpp
+++ b/src/descriptor_identifier/solver/SISSORegressor.cpp
@@ -35,7 +35,7 @@ SISSORegressor::SISSORegressor(
     const int n_models_store,
     const std::vector<std::string> sample_ids_train,
     const std::vector<std::string> sample_ids_test,
-    const std::vector<std::string> task_keys,
+    const std::vector<std::string> task_names,
     const bool fix_intercept
 ):
     SISSOSolver(
@@ -53,7 +53,7 @@ SISSORegressor::SISSORegressor(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         fix_intercept
     )
 {}
@@ -76,7 +76,10 @@ void SISSORegressor::add_models(const std::vector<std::vector<int>> indexes)
             _prop_unit,
             loss_function_util::copy(_loss),
             min_nodes.back(),
-            _leave_out_inds
+            _leave_out_inds,
+            _sample_ids_train,
+            _sample_ids_test,
+            _task_names
         );
         _models.back().push_back(model);
     }
diff --git a/src/descriptor_identifier/solver/SISSORegressor.hpp b/src/descriptor_identifier/solver/SISSORegressor.hpp
index 897298ef..c7f8ff71 100644
--- a/src/descriptor_identifier/solver/SISSORegressor.hpp
+++ b/src/descriptor_identifier/solver/SISSORegressor.hpp
@@ -57,7 +57,7 @@ public:
      * @param n_models_store The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      * @param fix_intrecept If true the bias term is fixed at 0
      */
     SISSORegressor(
@@ -74,7 +74,7 @@ public:
         const int n_models_store,
         const std::vector<std::string> sample_ids_train,
         const std::vector<std::string> sample_ids_test,
-        const std::vector<std::string> task_keys,
+        const std::vector<std::string> task_names,
         const bool fix_intercept=false
     );
 
@@ -128,7 +128,7 @@ public:
      * @param n_models_store (int) The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      * @param fix_intrecept (bool) If true the bias term is fixed at 0
      */
     SISSORegressor(
@@ -145,7 +145,7 @@ public:
         int n_models_store,
         py::list sample_ids_train,
         py::list sample_ids_test,
-        py::list task_keys,
+        py::list task_names,
         bool fix_intercept=false
     );
 
@@ -166,7 +166,7 @@ public:
      * @param n_models_store (int) The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      * @param fix_intrecept (bool) If true the bias term is fixed at 0
      */
     SISSORegressor(
@@ -183,7 +183,7 @@ public:
         int n_models_store,
         py::list sample_ids_train,
         py::list sample_ids_test,
-        py::list task_keys,
+        py::list task_names,
         bool fix_intercept=false
     );
 
diff --git a/src/descriptor_identifier/solver/SISSOSolver.cpp b/src/descriptor_identifier/solver/SISSOSolver.cpp
index 9039b0aa..33019c72 100644
--- a/src/descriptor_identifier/solver/SISSOSolver.cpp
+++ b/src/descriptor_identifier/solver/SISSOSolver.cpp
@@ -36,12 +36,12 @@ SISSOSolver::SISSOSolver(
     const int n_models_store,
     const std::vector<std::string> sample_ids_train,
     const std::vector<std::string> sample_ids_test,
-    const std::vector<std::string> task_keys,
+    const std::vector<std::string> task_names,
     const bool fix_intercept
 ):
     _sample_ids_train(sample_ids_train),
     _sample_ids_test(sample_ids_test),
-    _task_keys(task_keys),
+    _task_names(task_names),
     _task_sizes_train(task_sizes_train),
     _task_sizes_test(task_sizes_test),
     _leave_out_inds(leave_out_inds),
diff --git a/src/descriptor_identifier/solver/SISSOSolver.hpp b/src/descriptor_identifier/solver/SISSOSolver.hpp
index a7b279d6..939b8285 100644
--- a/src/descriptor_identifier/solver/SISSOSolver.hpp
+++ b/src/descriptor_identifier/solver/SISSOSolver.hpp
@@ -40,7 +40,7 @@ class SISSOSolver
 protected:
     const std::vector<std::string> _sample_ids_train; //!< Vector storing all sample ids for the training samples
     const std::vector<std::string> _sample_ids_test; //!< Vector storing all sample ids for the test samples
-    const std::vector<std::string> _task_keys; //!< Vector storing the ID of the task names
+    const std::vector<std::string> _task_names; //!< Vector storing the ID of the task names
 
     const std::vector<int> _task_sizes_train; //!< Number of training samples per task
     const std::vector<int> _task_sizes_test; //!< Number of testing samples per task
@@ -77,7 +77,7 @@ public:
      * @param n_models_store The number of models to output to files
      * @param sample_ids_train A vector storing all sample ids for the training samples
      * @param sample_ids_test A vector storing all sample ids for the test samples
-     * @param task_keys A vector storing the ID of the task names
+     * @param task_names A vector storing the ID of the task names
      * @param fix_intrecept If true the bias term is fixed at 0
      */
     SISSOSolver(
@@ -95,7 +95,7 @@ public:
         const int n_models_store,
         const std::vector<std::string> sample_ids_train,
         const std::vector<std::string> sample_ids_test,
-        const std::vector<std::string> task_keys,
+        const std::vector<std::string> task_names,
         const bool fix_intercept=false
     );
 
@@ -175,7 +175,7 @@ public:
      * @param n_models_store The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      * @param fix_intrecept If true the bias term is fixed at 0
      */
     SISSOSolver(
@@ -193,7 +193,7 @@ public:
         int n_models_store,
         py::list sample_ids_train,
         py::list sample_ids_test,
-        py::list task_keys,
+        py::list task_names,
         bool fix_intercept=false
     );
 
@@ -214,7 +214,7 @@ public:
      * @param n_models_store The number of models to output to files
      * @param sample_ids_train A list storing all sample ids for the training samples
      * @param sample_ids_test A list storing all sample ids for the test samples
-     * @param task_keys A list storing the ID of the task names
+     * @param task_names A list storing the ID of the task names
      * @param fix_intrecept If true the bias term is fixed at 0
      */
     SISSOSolver(
@@ -232,7 +232,7 @@ public:
         int n_models_store,
         py::list sample_ids_train,
         py::list sample_ids_test,
-        py::list task_keys,
+        py::list task_names,
         bool fix_intercept=false
     );
 
diff --git a/src/inputs/InputParser.cpp b/src/inputs/InputParser.cpp
index 36565188..13f0183a 100644
--- a/src/inputs/InputParser.cpp
+++ b/src/inputs/InputParser.cpp
@@ -119,8 +119,8 @@ InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Inter
         {
             ++_n_samp;
         }
-        tasks["none"] = std::vector<int>(_n_samp);
-        std::iota(tasks["none"].begin(), tasks["none"].end(), 0);
+        tasks["all"] = std::vector<int>(_n_samp);
+        std::iota(tasks["all"].begin(), tasks["all"].end(), 0);
     }
     else
     {
@@ -151,7 +151,6 @@ InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Inter
             int start = 0;
             for(auto& el : tasks)
             {
-                _task_keys.push_back(el.first);
                 _task_sizes_test.push_back(static_cast<int>(std::round(leave_out_frac * el.second.size())));
                 _task_sizes_train.push_back(el.second.size() - _task_sizes_test.back());
 
@@ -269,6 +268,11 @@ void InputParser::generate_feature_space(
     int n_train_samp = 0;
     int n_samp_test = 0;
 
+    for(auto& task : tasks)
+    {
+        _task_names.push_back(task.first);
+    }
+
     while (std::getline(data_stream, line))
     {
         std::vector<std::string> split_line;
@@ -289,6 +293,7 @@ void InputParser::generate_feature_space(
             n_samp_test = 0;
             for(auto& task : tasks)
             {
+
                 int task_ind = std::find(task.second.begin(), task.second.end(), cur_line) - task.second.begin();
                 for(int ii = 0; ii < task_ind; ++ii)
                 {
diff --git a/src/inputs/InputParser.hpp b/src/inputs/InputParser.hpp
index 6c72773a..21df1c8f 100644
--- a/src/inputs/InputParser.hpp
+++ b/src/inputs/InputParser.hpp
@@ -50,7 +50,7 @@ class InputParser
 public:
     std::vector<std::string> _sample_ids_train; //!< Vector storing all sample ids for the training samples
     std::vector<std::string> _sample_ids_test; //!< Vector storing all sample ids for the test samples
-    std::vector<std::string> _task_keys; //!< Vector storing the ID of the task names
+    std::vector<std::string> _task_names; //!< Vector storing the ID of the task names
 
     std::vector<std::string> _param_opset; //!< Vector containing all allowed operators strings for operators with free parameters
     std::vector<std::string> _opset; //!< Vector containing all allowed operators strings
diff --git a/src/main.cpp b/src/main.cpp
index bd1959f2..2f85c884 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -86,7 +86,7 @@ int main(int argc, char const *argv[])
             ip._n_models_store,
             ip._sample_ids_train,
             ip._sample_ids_test,
-            ip._task_keys,
+            ip._task_names,
             ip._fix_intercept
         );
         sisso.fit();
@@ -124,7 +124,7 @@ int main(int argc, char const *argv[])
             ip._n_models_store,
             ip._sample_ids_train,
             ip._sample_ids_test,
-            ip._task_keys,
+            ip._task_names,
             ip._fix_intercept
         );
         sisso.fit();
@@ -162,7 +162,7 @@ int main(int argc, char const *argv[])
             ip._n_models_store,
             ip._sample_ids_train,
             ip._sample_ids_test,
-            ip._task_keys
+            ip._task_names
         );
         sisso.fit();
 
diff --git a/src/python/py_binding_cpp_def/bindings_docstring_keyed.cpp b/src/python/py_binding_cpp_def/bindings_docstring_keyed.cpp
index 62907e4a..cbb996bc 100644
--- a/src/python/py_binding_cpp_def/bindings_docstring_keyed.cpp
+++ b/src/python/py_binding_cpp_def/bindings_docstring_keyed.cpp
@@ -1414,7 +1414,7 @@ void sisso::descriptor_identifier::registerSISSORegressor()
             py::list,
             optional<bool>
         >(
-            (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_keys"), arg("fix_intercept")),
+            (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names"), arg("fix_intercept")),
             "@DocString_sisso_reg_init_arr@"
         )
     )
@@ -1436,7 +1436,7 @@ void sisso::descriptor_identifier::registerSISSORegressor()
                 py::list,
                 optional<bool>
             >(
-                (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_keys"), arg("fix_intercept")),
+                (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names"), arg("fix_intercept")),
                 "@DocString_sisso_reg_init_list@"
             )
         )
@@ -1467,7 +1467,7 @@ void sisso::descriptor_identifier::registerSISSOLogRegressor()
             py::list,
             optional<bool>
         >(
-            (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_keys"), arg("fix_intercept")),
+            (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names"), arg("fix_intercept")),
             "@DocString_sisso_log_reg_init_arr@"
         )
     )
@@ -1489,7 +1489,7 @@ void sisso::descriptor_identifier::registerSISSOLogRegressor()
                 py::list,
                 optional<bool>
             >(
-                (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_keys"), arg("fix_intercept")),
+                (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names"), arg("fix_intercept")),
                 "@DocString_sisso_log_reg_init_list@"
             )
         )
@@ -1503,13 +1503,13 @@ void sisso::descriptor_identifier::registerSISSOClassifier()
         "SISSOClassifier",
         "@DocString_cls_sisso_class@",
         init<std::shared_ptr<FeatureSpace>, std::string, Unit, np::ndarray, np::ndarray, py::list, py::list, py::list, int, int, int, py::list, py::list, py::list>(
-            (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_keys")),
+            (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names")),
             "@DocString_sisso_class_init_arr@"
         )
     )
         .def(
             init<std::shared_ptr<FeatureSpace>, std::string, Unit, py::list, py::list, py::list, py::list, py::list, int, int, int, py::list, py::list, py::list>(
-                (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_keys")),
+                (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names")),
                 "@DocString_sisso_class_init_list@"
             )
         )
diff --git a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOClassifier.cpp b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOClassifier.cpp
index 220a520a..4ae3b8ba 100644
--- a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOClassifier.cpp
+++ b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOClassifier.cpp
@@ -35,7 +35,7 @@ SISSOClassifier::SISSOClassifier(
     int n_models_store,
     py::list sample_ids_train,
     py::list sample_ids_test,
-    py::list task_keys
+    py::list task_names
 ) :
     SISSOSolver(
         "classification",
@@ -52,7 +52,7 @@ SISSOClassifier::SISSOClassifier(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         false
     ),
     _c(100.0),
@@ -76,7 +76,7 @@ SISSOClassifier::SISSOClassifier(
     int n_models_store,
     py::list sample_ids_train,
     py::list sample_ids_test,
-    py::list task_keys
+    py::list task_names
 ) :
     SISSOSolver(
         "classification",
@@ -93,7 +93,7 @@ SISSOClassifier::SISSOClassifier(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         false
     ),
     _c(100.0),
diff --git a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOLogRegressor.cpp b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOLogRegressor.cpp
index f910cde5..53ccf1ac 100644
--- a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOLogRegressor.cpp
+++ b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOLogRegressor.cpp
@@ -35,7 +35,7 @@ SISSOLogRegressor::SISSOLogRegressor(
     int n_models_store,
     py::list sample_ids_train,
     py::list sample_ids_test,
-    py::list task_keys,
+    py::list task_names,
     bool fix_intercept
 ) :
     SISSORegressor(
@@ -52,7 +52,7 @@ SISSOLogRegressor::SISSOLogRegressor(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         fix_intercept
     )
 {
@@ -86,7 +86,7 @@ SISSOLogRegressor::SISSOLogRegressor(
     int n_models_store,
     py::list sample_ids_train,
     py::list sample_ids_test,
-    py::list task_keys,
+    py::list task_names,
     bool fix_intercept
 ) :
     SISSORegressor(
@@ -103,7 +103,7 @@ SISSOLogRegressor::SISSOLogRegressor(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         fix_intercept
     )
 {
diff --git a/src/python/py_binding_cpp_def/descriptor_identifier/SISSORegressor.cpp b/src/python/py_binding_cpp_def/descriptor_identifier/SISSORegressor.cpp
index 6f9f8b03..4b023f75 100644
--- a/src/python/py_binding_cpp_def/descriptor_identifier/SISSORegressor.cpp
+++ b/src/python/py_binding_cpp_def/descriptor_identifier/SISSORegressor.cpp
@@ -35,7 +35,7 @@ SISSORegressor::SISSORegressor(
     int n_models_store,
     py::list sample_ids_train,
     py::list sample_ids_test,
-    py::list task_keys,
+    py::list task_names,
     bool fix_intercept
 ) :
     SISSOSolver(
@@ -53,7 +53,7 @@ SISSORegressor::SISSORegressor(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         fix_intercept
     )
 {}
@@ -72,7 +72,7 @@ SISSORegressor::SISSORegressor(
     int n_models_store,
     py::list sample_ids_train,
     py::list sample_ids_test,
-    py::list task_keys,
+    py::list task_names,
     bool fix_intercept
 ) :
     SISSOSolver(
@@ -90,7 +90,7 @@ SISSORegressor::SISSORegressor(
         n_models_store,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
         fix_intercept
     )
 {}
diff --git a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOSolver.cpp b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOSolver.cpp
index 4c5cb8e4..3d6ef29a 100644
--- a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOSolver.cpp
+++ b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOSolver.cpp
@@ -36,12 +36,12 @@ SISSOSolver::SISSOSolver(
     int n_models_store,
     py::list sample_ids_train,
     py::list sample_ids_test,
-    py::list task_keys,
+    py::list task_names,
     bool fix_intercept
 ) :
     _sample_ids_train(python_conv_utils::from_list<std::string>(sample_ids_train)),
     _sample_ids_test(python_conv_utils::from_list<std::string>(sample_ids_test)),
-    _task_keys(python_conv_utils::from_list<std::string>(task_keys)),
+    _task_names(python_conv_utils::from_list<std::string>(task_names)),
     _task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)),
     _task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)),
     _leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)),
@@ -83,12 +83,12 @@ SISSOSolver::SISSOSolver(
     int n_models_store,
     py::list sample_ids_train,
     py::list sample_ids_test,
-    py::list task_keys,
+    py::list task_names,
     bool fix_intercept
 ) :
     _sample_ids_train(python_conv_utils::from_list<std::string>(sample_ids_train)),
     _sample_ids_test(python_conv_utils::from_list<std::string>(sample_ids_test)),
-    _task_keys(python_conv_utils::from_list<std::string>(task_keys)),
+    _task_names(python_conv_utils::from_list<std::string>(task_names)),
     _task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)),
     _task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)),
     _leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)),
diff --git a/src/python/py_interface/get_solver.py b/src/python/py_interface/get_solver.py
index 8e61c0a6..b5dea564 100644
--- a/src/python/py_interface/get_solver.py
+++ b/src/python/py_interface/get_solver.py
@@ -153,7 +153,7 @@ def get_fs_solver(
         leave_out_inds,
         sample_ids_train,
         sample_ids_test,
-        task_keys,
+        task_names,
     ) = read_csv(
         df,
         prop_key,
@@ -178,7 +178,7 @@ def get_fs_solver(
     if loss_type.lower() == "regression":
         print(sample_ids_train)
         print(sample_ids_test)
-        print(task_keys)
+        print(task_names)
         solver = SISSORegressor(
             fs,
             prop_label,
@@ -193,7 +193,7 @@ def get_fs_solver(
             n_model_store,
             sample_ids_train,
             sample_ids_test,
-            task_keys,
+            task_names,
         )
     elif loss_type.lower() == "log_regression":
         solver = SISSOLogRegressor(
@@ -210,7 +210,7 @@ def get_fs_solver(
             n_model_store,
             sample_ids_train,
             sample_ids_test,
-            task_keys,
+            task_names,
         )
     else:
         solver = SISSOClassifier(
@@ -227,6 +227,6 @@ def get_fs_solver(
             n_model_store,
             sample_ids_train,
             sample_ids_test,
-            task_keys,
+            task_names,
         )
     return fs, solver
diff --git a/src/python/py_interface/import_dataframe.py b/src/python/py_interface/import_dataframe.py
index d4dbdd91..a80a1a71 100644
--- a/src/python/py_interface/import_dataframe.py
+++ b/src/python/py_interface/import_dataframe.py
@@ -138,7 +138,7 @@ def read_csv(
             - leave_out_inds (list): Indices to use as the test set
             - sample_ids_train (list): List of sample id's for the training data
             - sample_ids_test (list): List of sample id's for the test data
-            - task_keys (list): List of all task id names
+            - task_names (list): List of all task id names
     """
     if not max_rung:
         raise ValueError("Maximum rung for the calculation is not defined.")
@@ -152,14 +152,14 @@ def read_csv(
     if task_key:
         task, _, _ = extract_col(df, task_key)
     else:
-        task = np.zeros(prop.shape, dtype=np.int64).astype(str)
+        task = np.array(["all"] * len(prop))
 
     # Map out which index belongs to which task and get the size of each task
     task_map = {}
-    task_keys, task_sizes = np.unique(task, return_counts=True)
+    task_names, task_sizes = np.unique(task, return_counts=True)
     task_sizes = task_sizes.astype(np.int32)
 
-    for kk, key in enumerate(task_keys):
+    for kk, key in enumerate(task_names):
         task_map[key] = np.where(task == key)[0].astype(np.int32)
         assert task_sizes[kk] == len(task_map[key])
 
@@ -172,21 +172,21 @@ def read_csv(
         if leave_out_frac > 0.0:
             task_sizes_test = [int(math.ceil(ts * leave_out_frac)) for ts in task_sizes]
 
-            for kk, key in enumerate(task_keys):
+            for kk, key in enumerate(task_names):
                 leave_out_inds += list(
                     np.random.choice(task_map[key], task_sizes_test[kk], False).astype(
                         np.int32
                     )
                 )
         else:
-            task_sizes_test = list(np.zeros(len(task_keys), dtype=np.int32))
+            task_sizes_test = list(np.zeros(len(task_names), dtype=np.int32))
     else:
         assert (leave_out_frac == 0.0) or (
             int(round(len(df) * leave_out_frac)) == len(leave_out_inds)
         )
 
-        task_sizes_test = list(np.zeros(len(task_keys), dtype=np.int32))
-        for kk, key in enumerate(task_keys):
+        task_sizes_test = list(np.zeros(len(task_names), dtype=np.int32))
+        for kk, key in enumerate(task_names):
             left_out = [ind for ind in leave_out_inds if ind in task_map[key]]
             task_sizes_test[kk] = len(left_out)
 
@@ -231,5 +231,5 @@ def read_csv(
         leave_out_inds,
         list(df.index[train_inds].to_numpy().astype(str)),
         list(df.index[leave_out_inds].to_numpy().astype(str)),
-        list(task_keys),
+        list(task_names),
     )
diff --git a/tests/googletest/descriptor_identification/model/test_model_classifier.cc b/tests/googletest/descriptor_identification/model/test_model_classifier.cc
index 405b7bdf..71086ca7 100644
--- a/tests/googletest/descriptor_identification/model/test_model_classifier.cc
+++ b/tests/googletest/descriptor_identification/model/test_model_classifier.cc
@@ -42,16 +42,24 @@ namespace
                 _task_sizes_test,
                 1
             );
+
+            _task_keys = {"all"};
+            _sample_ids_train = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"};
+            _sample_ids_test = {"20", "21"};
         }
-        std::vector<int> _leave_out_inds;
-        std::vector<int> _task_sizes_train;
-        std::vector<int> _task_sizes_test;
+        std::vector<std::string> _sample_ids_train;
+        std::vector<std::string> _sample_ids_test;
+        std::vector<std::string> _task_keys;
 
         std::vector<double> _prop;
         std::vector<double> _prop_test;
 
         std::vector<model_node_ptr> _features;
         std::shared_ptr<LossFunction> _loss;
+
+        std::vector<int> _task_sizes_train;
+        std::vector<int> _task_sizes_test;
+        std::vector<int> _leave_out_inds;
     };
 
     TEST_F(ModelClassifierTests, NodesTest)
@@ -61,7 +69,10 @@ namespace
             Unit("m"),
             _loss,
             _features,
-            _leave_out_inds
+            _leave_out_inds,
+            _sample_ids_train,
+            _sample_ids_test,
+            _task_keys
         );
         EXPECT_STREQ(model.toString().c_str(), "[A]");
         EXPECT_EQ(model.n_convex_overlap_train(), 0);
@@ -96,7 +107,7 @@ namespace
         EXPECT_EQ(model.n_dim(), 1);
         EXPECT_EQ(model.prop_unit(), Unit("m"));
 
-        // boost::filesystem::remove("train_class_mods.dat");
-        // boost::filesystem::remove("test_class_mods.dat");
+        boost::filesystem::remove("train_class_mods.dat");
+        boost::filesystem::remove("test_class_mods.dat");
     }
 }
diff --git a/tests/googletest/descriptor_identification/model/test_model_log_regressor.cc b/tests/googletest/descriptor_identification/model/test_model_log_regressor.cc
index 4c1e43ec..07f37869 100644
--- a/tests/googletest/descriptor_identification/model/test_model_log_regressor.cc
+++ b/tests/googletest/descriptor_identification/model/test_model_log_regressor.cc
@@ -42,7 +42,15 @@ namespace
 
             std::transform(value_1.begin(), value_1.end(), value_2.begin(), _prop.begin(), [](double v1, double v2){return std::log(0.001 * std::pow(v1, 0.1) * std::pow(v2, -2.1));});
             std::transform(test_value_1.begin(), test_value_1.end(), test_value_2.begin(), _prop_test.begin(), [](double v1, double v2){return std::log(0.001 * std::pow(v1, 0.1) * std::pow(v2, -2.1));});
+
+            _task_keys = {"all"};
+            _sample_ids_train = {"0", "1", "2", "3", "4", "6", "7", "8", "9", "10"};
+            _sample_ids_test = {"5", "11"};
         }
+        std::vector<std::string> _sample_ids_train;
+        std::vector<std::string> _sample_ids_test;
+        std::vector<std::string> _task_keys;
+
         std::vector<int> _leave_out_inds;
         std::vector<int> _task_sizes_train;
         std::vector<int> _task_sizes_test;
@@ -70,7 +78,10 @@ namespace
             Unit("m"),
             _loss,
             _features,
-            _leave_out_inds
+            _leave_out_inds,
+            _sample_ids_train,
+            _sample_ids_test,
+            _task_keys
         );
         EXPECT_STREQ(model.toString().c_str(), "exp(c0) * (A)^a0 * (B)^a1");
         EXPECT_LT(model.rmse(), 1e-10);
@@ -156,7 +167,10 @@ namespace
             Unit("m"),
             _loss,
             _features,
-            _leave_out_inds
+            _leave_out_inds,
+            _sample_ids_train,
+            _sample_ids_test,
+            _task_keys
         );
         EXPECT_STREQ(model.toString().c_str(), "(A)^a0 * (B)^a1");
 
diff --git a/tests/googletest/descriptor_identification/model/test_model_regressor.cc b/tests/googletest/descriptor_identification/model/test_model_regressor.cc
index 031ddd4d..d4eb9012 100644
--- a/tests/googletest/descriptor_identification/model/test_model_regressor.cc
+++ b/tests/googletest/descriptor_identification/model/test_model_regressor.cc
@@ -45,7 +45,15 @@ namespace
 
             std::transform(test_value_1.begin(), test_value_1.begin() + 1, test_value_2.begin(), _prop_test.begin(), [](double v1, double v2){return 0.001 + v1 + v2;});
             std::transform(test_value_1.begin() + 1, test_value_1.end(), test_value_2.begin() + 1, _prop_test.begin() + 1, [](double v1, double v2){return -6.5 + 1.25 * v1 - 0.4 * v2;});
+
+            _task_keys = {"task_1", "task_2"};
+            _sample_ids_train = {"0", "1", "2", "3", "4", "6", "7", "8", "9", "10"};
+            _sample_ids_test = {"5", "11"};
         }
+        std::vector<std::string> _sample_ids_train;
+        std::vector<std::string> _sample_ids_test;
+        std::vector<std::string> _task_keys;
+
         std::vector<int> _leave_out_inds;
         std::vector<int> _task_sizes_train;
         std::vector<int> _task_sizes_test;
@@ -73,7 +81,10 @@ namespace
             Unit("m"),
             _loss,
             _features,
-            _leave_out_inds
+            _leave_out_inds,
+            _sample_ids_train,
+            _sample_ids_test,
+            _task_keys
         );
 
         EXPECT_STREQ(model.toString().c_str(), "c0 + a0 * A + a1 * B");
@@ -169,7 +180,10 @@ namespace
             Unit("m"),
             _loss,
             _features,
-            _leave_out_inds
+            _leave_out_inds,
+            _sample_ids_train,
+            _sample_ids_test,
+            _task_keys
         );
 
         EXPECT_STREQ(model.toString().c_str(), "a0 * A + a1 * B");
diff --git a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor.cc b/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor.cc
index c44e494c..431e22aa 100644
--- a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor.cc
+++ b/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor.cc
@@ -96,6 +96,16 @@ namespace
             _allowed_ops = {"div", "add", "mult", "sub"};
             _allowed_param_ops = {};
 
+            _task_keys = {"all"};
+            for(int ii = 10; ii < 100; ++ii)
+            {
+                _sample_ids_train.push_back(std::to_string(ii));
+            }
+
+            for(int ii = 0; ii < 10; ++ii)
+            {
+                _sample_ids_test.push_back(std::to_string(ii));
+            }
         }
         std::vector<std::string> _sample_ids_train;
         std::vector<std::string> _sample_ids_test;
diff --git a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_regressor.cc b/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_regressor.cc
index 48a02d57..788b73d1 100644
--- a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_regressor.cc
+++ b/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_regressor.cc
@@ -116,6 +116,17 @@ namespace
 
             _allowed_ops = {"div", "sq", "cb", "sub"};
             _allowed_param_ops = {};
+
+            _task_keys = {"task_1", "task_2"};
+            for(int ii = 10; ii < 100; ++ii)
+            {
+                _sample_ids_train.push_back(std::to_string(ii));
+            }
+
+            for(int ii = 0; ii < 10; ++ii)
+            {
+                _sample_ids_test.push_back(std::to_string(ii));
+            }
         }
         std::vector<std::string> _sample_ids_train;
         std::vector<std::string> _sample_ids_test;
-- 
GitLab