-
Thomas Purcell authored
typos when introducing the test set led to problems
Thomas Purcell authoredtypos when introducing the test set led to problems
Model.cpp 5.25 KiB
#include <descriptor_identifier/Model/Model.hpp>
Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<Node>> feats) :
_n_samp_train(feats[0]->n_samp()),
_n_samp_test(feats[0]->n_test_samp()),
_n_dim(feats.size() + 1),
_feats(feats),
_coefs(_n_dim),
_prop_train(prop_train),
_prop_test(prop_test),
_train_error(_n_samp_train),
_test_error(_n_samp_test),
_D_train(_n_samp_train * _n_dim),
_D_test(_n_samp_test * _n_dim),
_prop_train_est(_n_samp_train, 0.0),
_prop_test_est(_n_samp_test, 0.0)
{
_prop_train_est.reserve(_n_samp_train);
_prop_test_est.reserve(_n_samp_test);
std::vector<double> a(_n_samp_train * _n_dim, 1.0);
for(int ff = 0; ff < feats.size(); ++ff)
{
std::copy_n(feats[ff]->value_ptr(), _n_samp_train, _D_train.data() + ff * _n_samp_train);
std::copy_n(feats[ff]->value_ptr(), _n_samp_train, a.data() + ff * _n_samp_train);
if(_n_samp_test > 0)
std::copy_n(feats[ff]->test_value_ptr(), _n_samp_test, _D_test.data() + ff * _n_samp_test);
}
std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_train, _D_train.data() + feats.size() * _n_samp_train);
std::vector<double> s(_n_dim, 0.0);
std::vector<double> work(_n_dim * _n_samp_train, 0.0);
int rank = 0;
int info = 0;
dgelss_(_n_samp_train, _n_dim, 1, a.data(), _n_samp_train, prop_train.data(), _n_samp_train, s.data(), 1e-13, &rank, work.data(), work.size(), &info);
std::copy_n(prop_train.begin(), _n_dim, _coefs.data());
dgemv_('N', _n_samp_train, _n_dim, 1.0, _D_train.data(), _n_samp_train, _coefs.data(), 1, 0.0, _prop_train_est.data(), 1);
std::transform(_prop_train_est.begin(), _prop_train_est.end(), _prop_train.data(), _train_error.data(), std::minus<double>());
if(_n_samp_test > 0)
{
std::copy_n(std::vector<double>(_n_samp_test, 1.0).data(), _n_samp_test, _D_test.data() + feats.size() * _n_samp_test);
dgemv_('N', _n_samp_test, _n_dim, 1.0, _D_test.data(), _n_samp_test, _coefs.data(), 1, 0.0, _prop_test_est.data(), 1);
std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.data(), _test_error.data(), std::minus<double>());
}
}
std::string Model::toString() const
{
std::stringstream unit_rep;
unit_rep << _coefs[_n_dim - 1];
for(int ff = 0; ff < _feats.size(); ++ff)
unit_rep << " + (" << _coefs[ff] << ") * " << _feats[ff]->expr();
return unit_rep.str();
}
std::ostream& operator<< (std::ostream& outStream, const Model& model)
{
outStream << model.toString();
return outStream;
}
void Model::train_to_file(std::string filename)
{
boost::filesystem::path p(filename.c_str());
boost::filesystem::create_directories(p.remove_filename());
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open(filename);
out_file_stream << "# " << toString() << std::endl;
out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
out_file_stream << "# coeffs:";
for(auto& coef: _coefs)
out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";";
out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),";
for(int ff = 0; ff < _feats.size(); ++ff)
out_file_stream << " Feature " << ff << " Value,";
out_file_stream << std::endl;
for(int ss = 0; ss < _n_samp_train; ++ss)
{
out_file_stream << std::setw(24) << std::setprecision(18) << _prop_train[ss] << std::setw(24) << std::setprecision(18) << _prop_train_est[ss];
for(int ff = 0; ff < _n_dim - 1; ++ff)
out_file_stream << std::setw(24) << std::setprecision(18) << _D_train[ss + ff * _n_samp_train];
out_file_stream << std::endl;
}
out_file_stream.close();
}
void Model::test_to_file(std::string filename, std::vector<int> test_inds)
{
boost::filesystem::path p(filename.c_str());
boost::filesystem::create_directories(p.remove_filename());
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open(filename);
out_file_stream << "# " << toString() << std::endl;
out_file_stream << "# Testing Indexes: [" << test_inds[0];
for(int ss = 1; ss < _n_samp_test; ++ss)
out_file_stream << ", " << test_inds[ss];
out_file_stream << "]" << std::endl;
out_file_stream << "# RMSE: " << test_rmse() << "; Max AE: " << test_max_ae() << std::endl;
out_file_stream << "# coeffs:";
for(auto& coef: _coefs)
out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";";
out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),";
for(int ff = 0; ff < _feats.size(); ++ff)
out_file_stream << " Feature " << ff << " Value,";
out_file_stream << std::endl;
for(int ss = 0; ss < _n_samp_test; ++ss)
{
out_file_stream << std::setw(24) << std::setprecision(18) << _prop_test[ss] << std::setw(24) << std::setprecision(18) << _prop_test_est[ss];
for(int ff = 0; ff < _feats.size(); ++ff)
out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->test_value()[ss];
out_file_stream << std::endl;
}
out_file_stream.close();
}