Skip to content
Snippets Groups Projects
Model.cpp 5.25 KiB
#include <descriptor_identifier/Model/Model.hpp>

Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<Node>> feats) :
    _n_samp_train(feats[0]->n_samp()),
    _n_samp_test(feats[0]->n_test_samp()),
    _n_dim(feats.size() + 1),
    _feats(feats),
    _coefs(_n_dim),
    _prop_train(prop_train),
    _prop_test(prop_test),
    _train_error(_n_samp_train),
    _test_error(_n_samp_test),
    _D_train(_n_samp_train * _n_dim),
    _D_test(_n_samp_test * _n_dim),
    _prop_train_est(_n_samp_train, 0.0),
    _prop_test_est(_n_samp_test, 0.0)
{
    _prop_train_est.reserve(_n_samp_train);
    _prop_test_est.reserve(_n_samp_test);

    std::vector<double> a(_n_samp_train * _n_dim, 1.0);
    for(int ff = 0; ff < feats.size(); ++ff)
    {
        std::copy_n(feats[ff]->value_ptr(), _n_samp_train, _D_train.data() + ff * _n_samp_train);
        std::copy_n(feats[ff]->value_ptr(), _n_samp_train, a.data() + ff * _n_samp_train);

        if(_n_samp_test > 0)
            std::copy_n(feats[ff]->test_value_ptr(), _n_samp_test, _D_test.data() + ff * _n_samp_test);
    }
    std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_train, _D_train.data() + feats.size() * _n_samp_train);

    std::vector<double> s(_n_dim, 0.0);
    std::vector<double> work(_n_dim * _n_samp_train, 0.0);
    int rank = 0;
    int info = 0;

    dgelss_(_n_samp_train, _n_dim, 1, a.data(), _n_samp_train, prop_train.data(), _n_samp_train, s.data(), 1e-13, &rank, work.data(), work.size(), &info);
    std::copy_n(prop_train.begin(), _n_dim, _coefs.data());

    dgemv_('N', _n_samp_train, _n_dim, 1.0, _D_train.data(), _n_samp_train, _coefs.data(), 1, 0.0, _prop_train_est.data(), 1);
    std::transform(_prop_train_est.begin(), _prop_train_est.end(), _prop_train.data(), _train_error.data(), std::minus<double>());
    if(_n_samp_test > 0)
    {
        std::copy_n(std::vector<double>(_n_samp_test, 1.0).data(), _n_samp_test, _D_test.data() + feats.size() * _n_samp_test);
        dgemv_('N', _n_samp_test, _n_dim, 1.0, _D_test.data(), _n_samp_test, _coefs.data(), 1, 0.0, _prop_test_est.data(), 1);
        std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.data(), _test_error.data(), std::minus<double>());
    }

}

std::string Model::toString() const
{
    std::stringstream unit_rep;
    unit_rep << _coefs[_n_dim - 1];
    for(int ff = 0; ff < _feats.size(); ++ff)
        unit_rep << " + (" << _coefs[ff] << ") * " << _feats[ff]->expr();
    return unit_rep.str();
}

std::ostream& operator<< (std::ostream& outStream, const Model& model)
{
    outStream << model.toString();
    return outStream;
}

void Model::train_to_file(std::string filename)
{
    boost::filesystem::path p(filename.c_str());
    boost::filesystem::create_directories(p.remove_filename());
    std::ofstream out_file_stream = std::ofstream();
    out_file_stream.open(filename);

    out_file_stream << "# " << toString() << std::endl;
    out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
    out_file_stream << "# coeffs:";
    for(auto& coef: _coefs)
        out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";";
    out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),";
    for(int ff = 0; ff < _feats.size(); ++ff)
        out_file_stream << "       Feature " << ff << " Value,";
    out_file_stream << std::endl;

    for(int ss = 0; ss < _n_samp_train; ++ss)
    {
        out_file_stream << std::setw(24) << std::setprecision(18) << _prop_train[ss] << std::setw(24) << std::setprecision(18) << _prop_train_est[ss];
        for(int ff = 0; ff < _n_dim - 1; ++ff)
            out_file_stream << std::setw(24) << std::setprecision(18) << _D_train[ss + ff * _n_samp_train];
        out_file_stream << std::endl;
    }
    out_file_stream.close();
}

void Model::test_to_file(std::string filename, std::vector<int> test_inds)
{
    boost::filesystem::path p(filename.c_str());
    boost::filesystem::create_directories(p.remove_filename());

    std::ofstream out_file_stream = std::ofstream();
    out_file_stream.open(filename);

    out_file_stream << "# " << toString() << std::endl;
    out_file_stream << "# Testing Indexes: [" << test_inds[0];
    for(int ss = 1; ss < _n_samp_test; ++ss)
        out_file_stream << ", " << test_inds[ss];
    out_file_stream << "]" << std::endl;
    out_file_stream << "# RMSE: " << test_rmse() << "; Max AE: " << test_max_ae() << std::endl;
    out_file_stream << "# coeffs:";
    for(auto& coef: _coefs)
        out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";";
    out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),";
    for(int ff = 0; ff < _feats.size(); ++ff)
        out_file_stream << "       Feature " << ff << " Value,";
    out_file_stream << std::endl;

    for(int ss = 0; ss < _n_samp_test; ++ss)
    {
        out_file_stream << std::setw(24) << std::setprecision(18) << _prop_test[ss] << std::setw(24) << std::setprecision(18) << _prop_test_est[ss];
        for(int ff = 0; ff < _feats.size(); ++ff)
            out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->test_value()[ss];
        out_file_stream << std::endl;
    }
    out_file_stream.close();
}