Skip to content
Snippets Groups Projects
Commit c16acd5f authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Refactor of Model

Standardized the output file and added constructors to recreate model from the output files
parent d062c07c
Branches
No related tags found
No related merge requests found
Showing with 357 additions and 102 deletions
......@@ -67,6 +67,175 @@ Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std:
}
}
Model::Model(std::string train_file)
{
_n_samp_test = 0;
std::vector<std::string> split_str;
std::vector<std::string> feature_expr_train = populate_model(train_file, true);
for(int ff = 0; ff < feature_expr_train.size(); ++ff)
{
split_str = str_utils::split_string_trim(feature_expr_train[ff]);
int rung = std::stoi(split_str[0]);
std::string unit_str = split_str[1];
std::string expr = split_str[2];
std::vector<double> feat_val(_n_samp_train);
std::vector<double> feat_test_val = {};
std::copy_n(&_D_train[ff * _n_samp_train], _n_samp_train, feat_val.data());
model_node_ptr feat = std::make_shared<ModelNode>(ff, rung, expr, feat_val, feat_test_val, Unit(unit_str));
_feats.push_back(feat);
}
}
Model::Model(std::string train_file, std::string test_file)
{
std::vector<std::string> split_str;
std::vector<std::string> feature_expr_train = populate_model(train_file, true);
std::vector<std::string> feature_expr_test = populate_model(test_file, false);
for(int ff = 0; ff < feature_expr_train.size(); ++ff)
{
if(feature_expr_train[ff] != feature_expr_test[ff])
throw std::logic_error("Features for train and test file do not agree");
split_str = str_utils::split_string_trim(feature_expr_train[ff]);
int rung = std::stoi(split_str[0]);
std::string unit_str = split_str[1];
std::string expr = split_str[2];
std::vector<double> feat_val(_n_samp_train);
std::vector<double> feat_test_val(_n_samp_test);
std::copy_n(&_D_train[ff * _n_samp_train], _n_samp_train, feat_val.data());
std::copy_n(&_D_test[ff * _n_samp_test], _n_samp_test, feat_test_val.data());
_feats.push_back(std::make_shared<ModelNode>(ff, rung, expr, feat_val, feat_test_val, Unit(unit_str)));
}
}
std::vector<std::string> Model::populate_model(std::string filename, bool train)
{
std::ifstream file_stream;
file_stream.open(filename, std::ios::in);
std::vector<std::string> feature_expr;
std::vector<std::string> split_line;
// Store model line
std::string model_line;
std::getline(file_stream, model_line);
// Get the error
std::string error_line;
std::getline(file_stream, error_line);
split_line = str_utils::split_string_trim(error_line);
double rmse = std::stod(split_line[1]);
double max_ae = std::stod(split_line[3]);
// Get coefficients
std::string line;
std::getline(file_stream, line);
std::getline(file_stream, line);
int n_task = 0;
int _n_dim = 0;
std::getline(file_stream, line);
do
{
++n_task;
split_line = str_utils::split_string_trim(line);
_n_dim = split_line.size() - 3;
if(train)
{
_coefs.push_back(std::vector<double>(_n_dim + 1, 0.0));
std::transform(split_line.begin() + 1, split_line.end()-1, _coefs.back().data(), [](std::string s){return std::stod(s);});
}
std::getline(file_stream, line);
} while(line.substr(0, 39).compare("# Feature Rung, Units, and Expressions") != 0);
std::getline(file_stream, line);
for(int ff = 0; ff < _n_dim; ++ff)
{
feature_expr.push_back(line.substr(6));
std::getline(file_stream, line);
}
std::getline(file_stream, line);
int n_samp = 0;
for(int tt = 0; tt < n_task; ++tt)
{
std::getline(file_stream, line);
split_line = str_utils::split_string_trim(line);
n_samp += std::stoi(split_line[1]);
if(train)
_task_sizes_train.push_back(std::stoi(split_line[1]));
else
_task_sizes_test.push_back(std::stoi(split_line[1]));
}
if(train)
{
_n_samp_train = n_samp;
_prop_train.resize(n_samp);
_prop_train_est.resize(n_samp);
_train_error.resize(n_samp);
}
else
{
_n_samp_test = n_samp;
_prop_test.resize(n_samp);
_prop_test_est.resize(n_samp);
_test_error.resize(n_samp);
}
std::getline(file_stream, line);
std::getline(file_stream, line);
if(!train)
std::getline(file_stream, line);
std::vector<std::vector<double>> feat_vals(_n_dim, std::vector<double>(n_samp, 0.0));
for(int ns = 0; ns < n_samp; ++ns)
{
std::getline(file_stream, line);
split_line = str_utils::split_string_trim(line);
if(train)
{
_prop_train[ns] = std::stod(split_line[0]);
_prop_train_est[ns] = std::stod(split_line[1]);
_train_error[ns] = _prop_train_est[ns] - _prop_train[ns];
}
else
{
_prop_test[ns] = std::stod(split_line[0]);
_prop_test_est[ns] = std::stod(split_line[1]);
_test_error[ns] = _prop_test_est[ns] - _prop_test[ns];
}
for(int nf = 0; nf < _n_dim; ++nf)
{
feat_vals[nf][ns] = std::stod(split_line[2 + nf]);
}
}
if(train)
{
_D_train.resize(_n_dim * n_samp);
for(int nf = 0; nf < _n_dim; ++nf)
std::copy_n(feat_vals[nf].data(), n_samp, &_D_train[nf * n_samp]);
}
else
{
_D_test.resize(_n_dim * n_samp);
for(int nf = 0; nf < _n_dim; ++nf)
std::copy_n(feat_vals[nf].data(), n_samp, &_D_test[nf * n_samp]);
}
return feature_expr;
}
std::string Model::toString() const
{
std::stringstream unit_rep;
......@@ -75,14 +244,14 @@ std::string Model::toString() const
unit_rep << " + a" << std::to_string(ff) << " * " << _feats[ff]->expr();
return unit_rep.str();
}
//
std::ostream& operator<< (std::ostream& outStream, const Model& model)
{
outStream << model.toString();
return outStream;
}
void Model::train_to_file(std::string filename)
void Model::to_file(std::string filename, bool train, std::vector<int> test_inds)
{
boost::filesystem::path p(filename.c_str());
boost::filesystem::create_directories(p.remove_filename());
......@@ -91,78 +260,74 @@ void Model::train_to_file(std::string filename)
out_file_stream.open(filename);
out_file_stream << "# " << toString() << std::endl;
out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
if(train)
out_file_stream << "# RMSE: " << std::setprecision(15) << rmse() << "; Max AE: " << max_ae() << std::endl;
else
out_file_stream << "# RMSE: " << std::setprecision(15) << test_rmse() << "; Max AE: " << test_max_ae() << std::endl;
out_file_stream << "# Coefficients" << std::endl;
out_file_stream << std::setw(10) << std::left << "# Task,";
out_file_stream << std::setw(10) << std::left << "# Task;";
for(int cc = 0; cc < _coefs[0].size() - 1; ++cc)
out_file_stream << std::setw(24) << "a" + std::to_string(cc);
out_file_stream << std::setw(24) << "c0" << std::endl;
out_file_stream << std::setw(24) << " a" + std::to_string(cc);
out_file_stream << " c0" << std::endl;
for(int cc = 0; cc < _coefs.size(); ++cc)
{
out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc);
out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc) + ", ";
for(auto& coeff : _coefs[cc])
out_file_stream << std::setw(24) << std::setprecision(18) << coeff;
out_file_stream << std::setprecision(15) << std::scientific << std::right << std::setw(22) << coeff << std::setw(2) << ", ";
out_file_stream << "\n";
}
out_file_stream << "\n" << std::setw(24) << std::left << "# Property Value" << std::setw(24) << "Property Value (EST)";
out_file_stream << "# Feature Rung, Units, and Expressions" << std::endl;
for(int ff = 0; ff < _feats.size(); ++ff)
out_file_stream << std::setw(24) << "Feature " + std::to_string(ff) + " Value";
out_file_stream << std::endl;
out_file_stream << std::setw(6) << std::left << "# " + std::to_string(ff) + ", " << std::to_string(_feats[ff]->rung()) + ", " << std::setw(50) << _feats[ff]->unit().toString() + ", " << _feats[ff]->expr() << std::endl;
for(int ss = 0; ss < _n_samp_train; ++ss)
out_file_stream << "# Number of Samples Per Task" << std::endl;
if(train)
{
out_file_stream << std::setw(24) << std::setprecision(18) << _prop_train[ss] << std::setw(24) << std::setprecision(18) << _prop_train_est[ss];
for(int ff = 0; ff < _n_dim - 1; ++ff)
out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->value()[ss];
out_file_stream << std::endl;
out_file_stream << std::setw(10) << std::left << "# Task;" << std::setw(24) << "n_mats_train" << std::endl;
for(int tt = 0; tt < _task_sizes_train.size(); ++tt)
out_file_stream << std::left << std::setw(10) << "# " + std::to_string(tt) + ", " << std::left << std::setw(22) << _task_sizes_train[tt] << std::endl;
}
out_file_stream.close();
}
void Model::test_to_file(std::string filename, std::vector<int> test_inds)
{
boost::filesystem::path p(filename.c_str());
boost::filesystem::create_directories(p.remove_filename());
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open(filename);
out_file_stream << "# " << toString() << std::endl;
out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
out_file_stream << "# Coefficients" << std::endl;
out_file_stream << std::setw(10) << std::left << "# Task";
for(int cc = 0; cc < _coefs[0].size() - 1; ++cc)
out_file_stream << std::setw(24) << "a" + std::to_string(cc);
out_file_stream << std::setw(24) << "c0" << std::endl;
for(int cc = 0; cc < _coefs.size(); ++cc)
else
{
out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc);
for(auto& coeff : _coefs[cc])
out_file_stream << std::setw(24) << std::setprecision(18) << coeff;
out_file_stream << "\n";
}
out_file_stream << std::setw(10) << std::left << "# Task;" << std::setw(24) << "n_mats_test" << std::endl;
for(int tt = 0; tt < _task_sizes_test.size(); ++tt)
out_file_stream << std::left << std::setw(10) << "# " + std::to_string(tt) + ", " << std::left << std::setw(22) << _task_sizes_test[tt] << std::endl;
out_file_stream << "# Test Indexes: [ " << test_inds[0];
for(int ii = 1; ii < test_inds.size(); ++ii)
out_file_stream << ", " << test_inds[ii];
out_file_stream << " ]" << std::endl;
out_file_stream << "# Test Indexes: [ " << test_inds[0];
for(int ii = 1; ii < test_inds.size(); ++ii)
out_file_stream << ", " << test_inds[ii];
out_file_stream << " ]" << std::endl;
}
out_file_stream << "\n" << std::setw(24) << std::left << "# Property Value" << std::setw(24) << "Property Value (EST)";
out_file_stream << "\n" << std::setw(24) << std::left << "#Property Value" << std::setw(24) << " Property Value (EST)";
for(int ff = 0; ff < _feats.size(); ++ff)
out_file_stream << std::setw(24) << "Feature " + std::to_string(ff) + " Value";
out_file_stream << std::setw(24) << " Feature " + std::to_string(ff) + " Value";
out_file_stream << std::endl;
for(int ss = 0; ss < _n_samp_test; ++ss)
if(train)
{
out_file_stream << std::setw(24) << std::setprecision(18) << _prop_test[ss] << std::setw(24) << std::setprecision(18) << _prop_test_est[ss];
for(int ff = 0; ff < _n_dim - 1; ++ff)
out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->test_value()[ss];
out_file_stream << std::endl;
for(int ss = 0; ss < _n_samp_train; ++ss)
{
out_file_stream << std::right << std::setw(22) << std::setprecision(15) << std::scientific << _prop_train[ss] << std::setw(2) << ", " << std::setw(22) << _prop_train_est[ss];
for(int ff = 0; ff < _n_dim - 1; ++ff)
out_file_stream << std::right << std::setw(2) << ", " << std::setw(22) << std::setprecision(15) << _feats[ff]->value()[ss];
out_file_stream << std::endl;
}
}
else
{
for(int ss = 0; ss < _n_samp_test; ++ss)
{
out_file_stream << std::right << std::setw(22) << std::setprecision(15) << std::scientific << _prop_test[ss] << std::setw(2) << ", " << std::setw(22) << _prop_test_est[ss];
for(int ff = 0; ff < _n_dim - 1; ++ff)
out_file_stream << std::right << std::setw(2) << ", " << std::setw(22) << std::setprecision(15) << _feats[ff]->test_value()[ss];
out_file_stream << std::endl;
}
}
out_file_stream.close();
}
......@@ -171,6 +336,8 @@ void Model::register_python()
{
using namespace boost::python;
class_<Model>("Model", init<std::vector<double>, std::vector<double>, std::vector<model_node_ptr>, std::vector<int>, std::vector<int>>())
.def(init<std::string>())
.def(init<std::string, std::string>())
.def("predict", &Model::predict)
.def("fit", &Model::predict_train)
.def("__str__", &Model::toString)
......@@ -178,20 +345,16 @@ void Model::register_python()
.def_readonly("_n_samp_train", &Model::_n_samp_train)
.def_readonly("_n_samp_test", &Model::_n_samp_test)
.def_readonly("_n_dim", &Model::_n_dim)
.def_readonly("_feats", &Model::_feats)
.def_readonly("_coefs", &Model::_coefs)
.def_readonly("_prop_train", &Model::_prop_train)
.def_readonly("_prop_test", &Model::_prop_test)
.def_readonly("_train_error", &Model::_train_error)
.def_readonly("_test_error", &Model::_test_error)
.def_readonly("_D_train", &Model::_D_train)
.def_readonly("_D_test", &Model::_D_test)
.def_readonly("_prop_train_est", &Model::_prop_train_est)
.def_readonly("_prop_test_est", &Model::_prop_test_est)
.def_readonly("_task_sizes_train", &Model::_task_sizes_train)
.def_readonly("_task_sizes_test", &Model::_task_sizes_test)
.add_property("prop_train_est", &Model::prop_train_est)
.add_property("prop_test_est", &Model::prop_test_est)
.add_property("prop_train", &Model::prop_train)
.add_property("prop_test", &Model::prop_test)
.add_property("train_error", &Model::train_error)
.add_property("test_error", &Model::test_error)
.add_property("feats", &Model::feats)
.add_property("coefs", &Model::coefs)
.add_property("rmse", &Model::rmse)
.add_property("test_rmse", &Model::test_rmse)
.add_property("max_ae", &Model::max_ae)
.add_property("test_max_ae", &Model::test_max_ae);
}
\ No newline at end of file
}
#ifndef MODEL
#define MODEL
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <boost/filesystem.hpp>
#include <boost/python.hpp>
......@@ -9,6 +11,10 @@
#include<iostream>
#include <feature_creation/node/ModelNode.hpp>
#include <utils/string_utils.hpp>
namespace python = boost::python;
namespace np = boost::python::numpy;
typedef std::shared_ptr<ModelNode> model_node_ptr;
/**
......@@ -23,7 +29,7 @@ class Model
std::vector<model_node_ptr> _feats; //!< List of features in the model
std::vector<std::vector<double>> _coefs; //!< Coefficients for teh features
std::vector<std::vector<double>> _coefs; //!< Coefficients for the features
std::vector<double> _prop_train; //!< The property to be modeled
std::vector<double> _prop_test; //!< The property to be modeled
std::vector<double> _train_error; //!< The error of the model
......@@ -45,6 +51,10 @@ public:
*/
Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<model_node_ptr> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test);
Model(std::string train_file);
Model(std::string train_file, std::string test_file);
std::vector<std::string> populate_model(std::string filename, bool train);
/**
* @brief Convert the model to a string
......@@ -89,16 +99,34 @@ public:
return std::abs(*std::max_element(_test_error.data(), _test_error.data() + _n_samp_test, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}));
}
inline python::list coefs()
{
python::list coef_lst;
for(auto& task_coefs : _coefs)
coef_lst.append<python::list>(python_conv_utils::to_list<double>(task_coefs));
return coef_lst;
}
/**
* @brief Print model to a file
*/
void test_to_file(std::string filename, std::vector<int> test_inds);
inline python::list feats()
{
python::list feat_lst;
for(auto& feat : _feats)
feat_lst.append<ModelNode>(*feat);
return feat_lst;
}
inline np::ndarray prop_train_est(){return python_conv_utils::to_ndarray<double>(_prop_train_est);}
inline np::ndarray prop_test_est(){return python_conv_utils::to_ndarray<double>(_prop_test_est);}
inline np::ndarray prop_train(){return python_conv_utils::to_ndarray<double>(_prop_train);}
inline np::ndarray prop_test(){return python_conv_utils::to_ndarray<double>(_prop_test);}
inline np::ndarray train_error(){return python_conv_utils::to_ndarray<double>(_train_error);}
inline np::ndarray test_error(){return python_conv_utils::to_ndarray<double>(_test_error);}
/**
* @brief Print model to a file
*/
void train_to_file(std::string filename);
void to_file(std::string filename, bool train = true, std::vector<int> test_inds = {});
static void register_python();
};
......
#include <descriptor_identifier/SISSORegressor.hpp>
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, int n_dim, int n_residual):
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual):
_prop(prop),
_prop_test(prop_test),
_a((n_dim + 1) * prop.size()),
......@@ -9,6 +10,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
_s(n_dim + 1),
_task_sizes_train(task_sizes_train),
_task_sizes_test(task_sizes_test),
_leave_out_inds(leave_out_inds),
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_samp(prop.size()),
......@@ -28,7 +30,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
_work = std::vector<double>(_lwork, 0.0);
}
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual) :
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, python::list leave_out_inds, int n_dim, int n_residual) :
_prop(python_conv_utils::from_ndarray<double>(prop)),
_prop_test(python_conv_utils::from_ndarray<double>(prop_test)),
_a((n_dim + 1) * prop.shape(0)),
......@@ -37,6 +39,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::nda
_s(n_dim + 1),
_task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)),
_task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)),
_leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)),
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_samp(prop.shape(0)),
......@@ -56,7 +59,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::nda
_work = std::vector<double>(_lwork, 0.0);
}
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual) :
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, python::list leave_out_inds, int n_dim, int n_residual) :
_prop(python_conv_utils::from_list<double>(prop)),
_prop_test(python_conv_utils::from_list<double>(prop_test)),
_a((n_dim + 1) * boost::python::len(prop)),
......@@ -65,6 +68,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python:
_s(n_dim + 1),
_task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)),
_task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)),
_leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)),
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_samp(boost::python::len(prop)),
......@@ -158,6 +162,12 @@ void SISSORegressor::fit()
model_node_ptr model_feat = std::make_shared<ModelNode>(_feat_space->phi_selected()[rr]->arr_ind(), _feat_space->phi_selected()[rr]->rung(), _feat_space->phi_selected()[rr]->expr(), _feat_space->phi_selected()[rr]->value(), _feat_space->phi_selected()[rr]->test_value(), _feat_space->phi_selected()[rr]->unit());
models.push_back(Model(_prop, _prop_test, {model_feat}, _task_sizes_train, _task_sizes_test));
models.back().copy_error(&residual[rr * _n_samp]);
if(_mpi_comm->rank() == 0)
{
models.back().to_file("models/train_dim_1_model_" + std::to_string(rr) + ".dat");
if(_leave_out_inds.size() > 0)
models.back().to_file("models/test_dim_1_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds);
}
}
_models.push_back(models);
......@@ -182,10 +192,16 @@ void SISSORegressor::fit()
_mpi_comm->barrier();
duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
if(_mpi_comm->rank() == 0)
{
std::cout << "Time for l0-norm: " << duration << std::endl;
for(int rr = 0; rr < _n_residual; ++rr)
_models.back()[rr].copy_error(&residual[rr * _n_samp]);
for(int rr = 0; rr < _n_residual; ++rr)
{
_models.back()[rr].to_file("models/train_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat");
if(_leave_out_inds.size() > 0)
_models.back()[rr].to_file("models/test_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds);
}
}
}
}
......@@ -275,8 +291,8 @@ python::list SISSORegressor::models_py()
void SISSORegressor::register_python()
{
using namespace boost::python;
class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, python::list, python::list, int, int>())
.def(init<std::shared_ptr<FeatureSpace>, python::list, python::list, python::list, python::list, int, int>())
class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, python::list, python::list, python::list, int, int>())
.def(init<std::shared_ptr<FeatureSpace>, python::list, python::list, python::list, python::list, python::list, int, int>())
.def("fit", &SISSORegressor::fit)
.add_property("prop", &SISSORegressor::prop_py)
.add_property("prop_test", &SISSORegressor::prop_test_py)
......
......@@ -27,6 +27,7 @@ protected:
std::vector<int> _task_sizes_train;
std::vector<int> _task_sizes_test;
std::vector<int> _leave_out_inds;
std::shared_ptr<FeatureSpace> _feat_space; //!< Feature Space for the problem
std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPI Communicator
......@@ -47,9 +48,11 @@ public:
*/
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, int n_dim, int n_residual);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, python::list leave_out_inds, int n_dim, int n_residual);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, python::list leave_out_inds, int n_dim, int n_residual);
/**
* @brief Get the optimal size of the working array
......
......@@ -116,6 +116,22 @@ FeatureSpace::FeatureSpace(
initialize_fs(python_conv_utils::from_ndarray<double>(prop));
}
boost::python::list FeatureSpace::phi0_py()
{
python::list feat_lst;
for(auto& feat : _phi_0)
feat_lst.append<FeatureNode>(FeatureNode(feat->feat_ind(), feat->expr(), feat->value(), feat->test_value(), feat->unit()));
return feat_lst;
}
boost::python::list FeatureSpace::phi_selected_py()
{
python::list feat_lst;
for(auto& feat : _phi_selected)
feat_lst.append<ModelNode>(ModelNode(feat->d_mat_ind(), feat->rung(), feat->expr(), feat->value(), feat->test_value(), feat->unit()));
return feat_lst;
}
void FeatureSpace::initialize_fs(std::vector<double> prop)
{
if(_n_rung_store == -1)
......@@ -147,7 +163,6 @@ void FeatureSpace::initialize_fs(std::vector<double> prop)
generate_feature_space(prop);
_scores.reserve(_phi.size());
_scores.resize(_phi.size());
}
void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound, double u_bound)
......@@ -795,7 +810,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
}
if(_mpi_comm->rank() == 0)
out_file_stream.close();
}
void FeatureSpace::register_python()
......@@ -810,7 +824,6 @@ void FeatureSpace::register_python()
.def("sis", sis_ndarray)
.def("feat_in_phi", &FeatureSpace::feat_in_phi)
.add_property("phi_selected", &FeatureSpace::phi_selected_py)
.add_property("phi", &FeatureSpace::phi_py)
.add_property("phi0", &FeatureSpace::phi0_py)
.add_property("scores", &FeatureSpace::scores_py)
.add_property("task_sizes", &FeatureSpace::task_sizes_py)
......
......@@ -3,6 +3,7 @@
#include <mpi_interface/MPI_Interface.hpp>
#include <feature_creation/node/FeatureNode.hpp>
#include <feature_creation/node/ModelNode.hpp>
#include <feature_creation/node/operator_nodes/allowed_ops.hpp>
#include <feature_creation/node/value_storage/nodes_value_containers.hpp>
#include <utils/project.hpp>
......@@ -137,22 +138,19 @@ public:
*/
inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
inline boost::python::list phi_selected_py(){return python_conv_utils::to_list<node_ptr>(_phi_selected);};
boost::python::list phi_selected_py();
/**
* @brief Accessor function for _phi
*/
inline std::vector<node_ptr> phi(){return _phi;};
inline boost::python::list phi_py(){return python_conv_utils::to_list<node_ptr>(_phi);};
/**
* @brief Accessor function for _phi_0
*/
inline std::vector<node_ptr> phi0(){return _phi_0;};
inline boost::python::list phi0_py(){return python_conv_utils::to_list<node_ptr>(_phi_0);};
boost::python::list phi0_py();
/**
* @brief Accessor function for _scores
*/
......
......@@ -3,15 +3,18 @@
FeatureNode::FeatureNode()
{}
FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit) :
FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool set_val) :
Node(feat_ind, value.size(), test_value.size()),
_value(value),
_test_value(test_value),
_unit(unit),
_expr(expr)
{
set_value();
set_test_value();
if(set_val)
{
set_value();
set_test_value();
}
}
FeatureNode::FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Unit unit) :
......
......@@ -60,7 +60,7 @@ public:
* @param value Value of the feature for each test sample
* @param unit Unit of the feature
*/
FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit);
FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool set_val = true);
FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Unit unit);
FeatureNode(int feat_ind, std::string expr, python::list value, python::list test_value, Unit unit);
......
......@@ -4,7 +4,7 @@ ModelNode::ModelNode()
{}
ModelNode::ModelNode(int feat_ind, int rung, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit) :
FeatureNode(feat_ind, expr, value, test_value, unit),
FeatureNode(feat_ind, expr, value, test_value, unit, false),
_rung(rung)
{}
......
......@@ -97,6 +97,8 @@ void Node::register_python()
class_<NodeWrap, boost::noncopyable>("Node", no_init)
.def("reindex", reindex_1)
.def("reindex", reindex_2)
.def("__str__", &Node::expr)
.def("__repr__", &Node::expr)
.add_property("n_samp", &Node::n_samp)
.add_property("n_test_samp", &Node::n_test_samp)
.add_property("feat_ind", &Node::feat_ind)
......@@ -112,7 +114,7 @@ void Node::register_python()
.def("is_nan", pure_virtual(&Node::is_nan))
.def("is_const", pure_virtual(&Node::is_const))
.def("rung", pure_virtual(&Node::rung))
;
;
}
BOOST_SERIALIZATION_ASSUME_ABSTRACT(Node)
......
......@@ -36,7 +36,7 @@ int main(int argc, char const *argv[])
std::cout<< "time input_parsing/Feature space generation: "<< duration << std::endl;
node_value_arrs::initialize_d_matrix_arr();
SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._n_dim, IP._n_residuals);
SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._leave_out_inds, IP._n_dim, IP._n_residuals);
sisso.fit();
if(mpi_setup::comm->rank() == 0)
......@@ -49,12 +49,12 @@ int main(int argc, char const *argv[])
else
std::cout << std::endl;
std::cout << sisso.models()[ii][0] << "\n" << std::endl;
for(int jj = 0; jj < sisso.models()[ii].size(); ++jj)
{
sisso.models()[ii][jj].train_to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat");
if(IP._prop_test.size() > 0)
sisso.models()[ii][jj].test_to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat", IP._leave_out_inds);
}
// for(int jj = 0; jj < sisso.models()[ii].size(); ++jj)
// {
// sisso.models()[ii][jj].to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat");
// if(IP._prop_test.size() > 0)
// sisso.models()[ii][jj].to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat", false, IP._leave_out_inds);
// }
}
}
......
#include <utils/string_utils.hpp>
std::vector<std::string> str_utils::split_string_trim(std::string str, std::string split_tokens)
{
std::vector<std::string> split_str;
boost::algorithm::split(split_str, str, boost::algorithm::is_any_of(split_tokens));
for(auto& str_sec : split_str)
boost::algorithm::trim(str_sec);
return split_str;
}
#ifndef STRING_UTILS
#define STRING_UTILS
#include <cmath>
#include <string>
#include <vector>
#include <iostream>
#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/trim.hpp>
namespace str_utils
{
std::vector<std::string> split_string_trim(std::string str, std::string split_tokens = ",;:");
}
#endif
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment