Commit 016b7a34 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Multi-task SISSO implimented

Works on initial test cases
parent 1f1553cc
......@@ -29,6 +29,7 @@ __top_builddir__sisso_cpp_SOURCES = \
feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp \
feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp \
feature_creation/node/operator_nodes/allowed_ops.cpp \
utils/project.cpp \
feature_creation/feature_space/FeatureSpace.cpp \
inputs/InputParser.cpp \
descriptor_identifier/Model/Model.cpp \
......
#include <descriptor_identifier/Model/Model.hpp>
Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats) :
Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test) :
_n_samp_train(feats[0]->n_samp()),
_n_samp_test(feats[0]->n_test_samp()),
_n_dim(feats.size() + 1),
_feats(feats),
_coefs(_n_dim),
_prop_train(prop_train),
_prop_test(prop_test),
_train_error(_n_samp_train),
......@@ -13,47 +12,67 @@ Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std:
_D_train(_n_samp_train * _n_dim),
_D_test(_n_samp_test * _n_dim),
_prop_train_est(_n_samp_train, 0.0),
_prop_test_est(_n_samp_test, 0.0)
_prop_test_est(_n_samp_test, 0.0),
_task_sizes_train(task_sizes_train),
_task_sizes_test(task_sizes_test)
{
_prop_train_est.reserve(_n_samp_train);
_prop_test_est.reserve(_n_samp_test);
std::vector<double> a(_n_samp_train * _n_dim, 1.0);
for(int ff = 0; ff < feats.size(); ++ff)
{
std::copy_n(feats[ff]->value_ptr(), _n_samp_train, _D_train.data() + ff * _n_samp_train);
std::copy_n(feats[ff]->value_ptr(), _n_samp_train, a.data() + ff * _n_samp_train);
if(_n_samp_test > 0)
std::copy_n(feats[ff]->test_value().data(), _n_samp_test, _D_test.data() + ff * _n_samp_test);
}
std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_train, _D_train.data() + feats.size() * _n_samp_train);
std::vector<double> s(_n_dim, 0.0);
std::vector<double> work(_n_dim * _n_samp_train, 0.0);
int rank = 0;
int info = 0;
int start = 0;
dgelss_(_n_samp_train, _n_dim, 1, a.data(), _n_samp_train, prop_train.data(), _n_samp_train, s.data(), 1e-13, &rank, work.data(), work.size(), &info);
std::copy_n(prop_train.begin(), _n_dim, _coefs.data());
dgemv_('N', _n_samp_train, _n_dim, 1.0, _D_train.data(), _n_samp_train, _coefs.data(), 1, 0.0, _prop_train_est.data(), 1);
std::transform(_prop_train_est.begin(), _prop_train_est.end(), _prop_train.data(), _train_error.data(), std::minus<double>());
if(_n_samp_test > 0)
for(auto& sz : _task_sizes_train)
{
std::copy_n(std::vector<double>(_n_samp_test, 1.0).data(), _n_samp_test, _D_test.data() + feats.size() * _n_samp_test);
dgemv_('N', _n_samp_test, _n_dim, 1.0, _D_test.data(), _n_samp_test, _coefs.data(), 1, 0.0, _prop_test_est.data(), 1);
std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.data(), _test_error.data(), std::minus<double>());
std::fill_n(a.data() + feats.size() * sz, sz, 1.0);
std::fill_n(_D_train.data() + feats.size() * sz, sz, 1.0);
for(int ff = 0; ff < feats.size(); ++ff)
{
std::copy_n(feats[ff]->value_ptr() + start, sz, _D_train.data() + ff * sz);
std::copy_n(feats[ff]->value_ptr() + start, sz, a.data() + ff * sz);
}
dgelss_(sz, _n_dim, 1, a.data(), sz, prop_train.data() + start, sz, s.data(), 1e-13, &rank, work.data(), work.size(), &info);
_coefs.push_back(std::vector<double>(_n_dim, 0.0));
std::copy_n(prop_train.begin() + start, _n_dim, _coefs.back().data());
dgemv_('N', sz, _n_dim, 1.0, _D_train.data(), sz, _coefs.back().data(), 1, 0.0, _prop_train_est.data() + start, 1);
std::transform(_prop_train_est.begin() + start, _prop_train_est.begin() + start + sz, _prop_train.data() + start, _train_error.data() + start, std::minus<double>());
start += sz;
}
start = 0;
int ii = 0;
for(auto& sz : _task_sizes_test)
{
if(sz > 0)
{
for(int ff = 0; ff < feats.size(); ++ff)
std::copy_n(feats[ff]->test_value().data() + start, sz, _D_test.data() + ff * sz);
std::fill_n(_D_test.data() + feats.size() * sz, sz, 1.0);
dgemv_('N', sz, _n_dim, 1.0, _D_test.data(), sz, _coefs[ii].data(), 1, 0.0, _prop_test_est.data() + start, 1);
std::transform(_prop_test_est.begin() + start, _prop_test_est.begin() + start + sz, _prop_test.data() + start, _test_error.data() + start, std::minus<double>());
}
++ii;
start += sz;
}
}
std::string Model::toString() const
{
std::stringstream unit_rep;
unit_rep << _coefs[_n_dim - 1];
unit_rep << "c0";
for(int ff = 0; ff < _feats.size(); ++ff)
unit_rep << " + (" << _coefs[ff] << ") * " << _feats[ff]->expr();
unit_rep << " + a" << std::to_string(ff) << " * " << _feats[ff]->expr();
return unit_rep.str();
}
......@@ -73,19 +92,31 @@ void Model::train_to_file(std::string filename)
out_file_stream << "# " << toString() << std::endl;
out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
out_file_stream << "# coeffs:";
for(auto& coef: _coefs)
out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";";
out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),";
out_file_stream << "# Coefficients" << std::endl;
out_file_stream << std::setw(10) << std::left << "# Task,";
for(int cc = 0; cc < _coefs[0].size() - 1; ++cc)
out_file_stream << std::setw(24) << "a" + std::to_string(cc);
out_file_stream << std::setw(24) << "c0" << std::endl;
for(int cc = 0; cc < _coefs.size(); ++cc)
{
out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc);
for(auto& coeff : _coefs[cc])
out_file_stream << std::setw(24) << std::setprecision(18) << coeff;
out_file_stream << "\n";
}
out_file_stream << "\n" << std::setw(24) << std::left << "# Property Value" << std::setw(24) << "Property Value (EST)";
for(int ff = 0; ff < _feats.size(); ++ff)
out_file_stream << " Feature " << ff << " Value,";
out_file_stream << std::setw(24) << "Feature " + std::to_string(ff) + " Value";
out_file_stream << std::endl;
for(int ss = 0; ss < _n_samp_train; ++ss)
{
out_file_stream << std::setw(24) << std::setprecision(18) << _prop_train[ss] << std::setw(24) << std::setprecision(18) << _prop_train_est[ss];
for(int ff = 0; ff < _n_dim - 1; ++ff)
out_file_stream << std::setw(24) << std::setprecision(18) << _D_train[ss + ff * _n_samp_train];
out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->value()[ss];
out_file_stream << std::endl;
}
out_file_stream.close();
......@@ -100,23 +131,36 @@ void Model::test_to_file(std::string filename, std::vector<int> test_inds)
out_file_stream.open(filename);
out_file_stream << "# " << toString() << std::endl;
out_file_stream << "# Testing Indexes: [" << test_inds[0];
for(int ss = 1; ss < _n_samp_test; ++ss)
out_file_stream << ", " << test_inds[ss];
out_file_stream << "]" << std::endl;
out_file_stream << "# RMSE: " << test_rmse() << "; Max AE: " << test_max_ae() << std::endl;
out_file_stream << "# coeffs:";
for(auto& coef: _coefs)
out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";";
out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),";
out_file_stream << "# RMSE: " << rmse() << "; Max AE: " << max_ae() << std::endl;
out_file_stream << "# Coefficients" << std::endl;
out_file_stream << std::setw(10) << std::left << "# Task";
for(int cc = 0; cc < _coefs[0].size() - 1; ++cc)
out_file_stream << std::setw(24) << "a" + std::to_string(cc);
out_file_stream << std::setw(24) << "c0" << std::endl;
for(int cc = 0; cc < _coefs.size(); ++cc)
{
out_file_stream << std::setw(10) << std::left << "# " + std::to_string(cc);
for(auto& coeff : _coefs[cc])
out_file_stream << std::setw(24) << std::setprecision(18) << coeff;
out_file_stream << "\n";
}
out_file_stream << "#Test Indexes: [ " << test_inds[0];
for(int ii = 1; ii < test_inds.size(); ++ii)
out_file_stream << ", " << test_inds[ii];
out_file_stream << " ]" << std::endl;
out_file_stream << "\n" << std::setw(24) << std::left << "# Property Value" << std::setw(24) << "Property Value (EST)";
for(int ff = 0; ff < _feats.size(); ++ff)
out_file_stream << " Feature " << ff << " Value,";
out_file_stream << std::setw(24) << "Feature " + std::to_string(ff) + " Value";
out_file_stream << std::endl;
for(int ss = 0; ss < _n_samp_test; ++ss)
{
out_file_stream << std::setw(24) << std::setprecision(18) << _prop_test[ss] << std::setw(24) << std::setprecision(18) << _prop_test_est[ss];
for(int ff = 0; ff < _feats.size(); ++ff)
for(int ff = 0; ff < _n_dim - 1; ++ff)
out_file_stream << std::setw(24) << std::setprecision(18) << _feats[ff]->test_value()[ss];
out_file_stream << std::endl;
}
......
......@@ -21,7 +21,7 @@ class Model
std::vector<std::shared_ptr<FeatureNode>> _feats; //!< List of features in the model
std::vector<double> _coefs; //!< Coefficients for teh features
std::vector<std::vector<double>> _coefs; //!< Coefficients for teh features
std::vector<double> _prop_train; //!< The property to be modeled
std::vector<double> _prop_test; //!< The property to be modeled
std::vector<double> _train_error; //!< The error of the model
......@@ -32,6 +32,8 @@ class Model
std::vector<double> _prop_train_est; //!< The estimated Property
std::vector<double> _prop_test_est; //!< The estimated Property
std::vector<int> _task_sizes_train; //!< Number of samples in each task
std::vector<int> _task_sizes_test; //!< Number of samples in each task
public:
/**
* @brief Constructor for the model
......@@ -39,7 +41,7 @@ public:
* @param prop The property
* @param feats The features for the model
*/
Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats);
Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<FeatureNode>> feats, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test);
/**
......
#include <descriptor_identifier/SISSORegressor.hpp>
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, int n_dim, int n_residual):
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, int n_dim, int n_residual):
_prop(prop),
_prop_test(prop_test),
_a(new double[(n_dim + 1) * prop.size()]),
......@@ -9,6 +9,8 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
_error(new double[prop.size()]),
_work(nullptr),
_s(new double[n_dim + 1]),
_task_sizes_train(task_sizes_train),
_task_sizes_test(task_sizes_test),
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_samp(prop.size()),
......@@ -33,22 +35,22 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
_work = std::unique_ptr<double[]>(new double[_lwork]);
}
void SISSORegressor::set_a(std::vector<int>& inds)
void SISSORegressor::set_a(std::vector<int>& inds, int start, int n_samp)
{
for(int ii = 0; ii < inds.size(); ++ii)
std::copy_n(node_value_arrs::get_d_matrix_ptr(inds[ii]), _n_samp, _a.get() + ii * _n_samp);
std::copy_n(_ones.get(), _n_samp, _a.get() + inds.size() * _n_samp);
std::copy_n(node_value_arrs::get_d_matrix_ptr(inds[ii]) + start, n_samp, _a.get() + ii * n_samp);
std::copy_n(_ones.get(), n_samp, _a.get() + inds.size() * n_samp);
}
void SISSORegressor::least_squares(std::vector<int>& inds, double* coeffs)
void SISSORegressor::least_squares(std::vector<int>& inds, double* coeffs, int start, int n_samp)
{
int info;
int n_dim = inds.size() + 1;
set_a(inds);
std::copy_n(_prop.data(), _n_samp, _b.get());
set_a(inds, start, n_samp);
std::copy_n(_prop.data() + start, n_samp, _b.get());
dgelss_(_n_samp, n_dim, 1, _a.get(), _n_samp, _b.get(), _n_samp, _s.get(), 1e-13, &_rank, _work.get(), _lwork, &info);
dgelss_(n_samp, n_dim, 1, _a.get(), n_samp, _b.get(), n_samp, _s.get(), 1e-13, &_rank, _work.get(), _lwork, &info);
if(info == 0)
std::copy_n(_b.get(), n_dim, coeffs);
......@@ -69,11 +71,11 @@ int SISSORegressor::get_opt_lwork(int n_dim)
throw std::logic_error("Failed to get lwork.");
}
void SISSORegressor::set_error(std::vector<int>& inds, double* coeffs)
void SISSORegressor::set_error(std::vector<int>& inds, double* coeffs, int start, int n_samp)
{
set_a(inds);
dgemv_('N', _n_samp, inds.size() + 1, 1.0, _a.get(), _n_samp, coeffs, 1, 1e-13, _b.get(), 1);
std::transform(_prop.begin(), _prop.end(), _b.get(), _error.get(), std::minus<double>());
set_a(inds, start, n_samp);
dgemv_('N', n_samp, inds.size() + 1, 1.0, _a.get(), n_samp, coeffs, 1, 1e-13, _b.get(), 1);
std::transform(_prop.begin() + start, _prop.begin() + start + n_samp, _b.get(), _error.get() + start, std::minus<double>());
}
void SISSORegressor::fit()
......@@ -95,7 +97,7 @@ void SISSORegressor::fit()
std::vector<Model> models;
for(int rr = 0; rr < _n_residual; ++rr)
{
models.push_back(Model(_prop, _prop_test, {_feat_space->phi_selected()[rr]}));
models.push_back(Model(_prop, _prop_test, {_feat_space->phi_selected()[rr]}, _task_sizes_train, _task_sizes_test));
models.back().copy_error(&residual[rr * _n_samp]);
}
_models.push_back(models);
......@@ -142,9 +144,16 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
util_funcs::iterate(inds, inds.size(), _mpi_comm->rank());
do {
least_squares(inds, coefs.data());
set_error(inds, coefs.data());
double error = util_funcs::norm(_error.get(), _n_samp);
int start = 0;
double error = 0.0;
for(auto& sz : _task_sizes_train)
{
least_squares(inds, coefs.data(), start, sz);
set_error(inds, coefs.data(), start, sz);
error += std::pow(util_funcs::norm(_error.get() + start, sz), 2.0) / sz;
start += sz;
}
error = std::sqrt(error / _task_sizes_train.size());
if(error < min_errors.back())
{
int rr = 0;
......@@ -174,7 +183,7 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
{
for(int ii = 0; ii < n_dim; ++ii)
min_nodes[ii] = _feat_space->phi_selected()[all_inds_min[inds[rr] * n_dim + ii]];
models.push_back(Model(_prop, _prop_test, min_nodes));
models.push_back(Model(_prop, _prop_test, min_nodes, _task_sizes_train, _task_sizes_test));
}
_models.push_back(models);
......
......@@ -24,6 +24,8 @@ protected:
std::unique_ptr<double[]> _work; //!< The work array for least squares problems
std::unique_ptr<double[]> _s; //!< The S array for least squares problems
std::vector<int> _task_sizes_train;
std::vector<int> _task_sizes_test;
std::shared_ptr<FeatureSpace> _feat_space; //!< Feature Space for the problem
std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPI Communicator
......@@ -42,7 +44,7 @@ public:
* @param prop Property to model
* @param n_dim Maximum dimension of the model
*/
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, int n_dim, int n_residual);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, int n_dim, int n_residual);
/**
* @brief Get the optimal size of the working array
......@@ -58,7 +60,7 @@ public:
* @param inds Feature indexes to get the model of
* @param coeffs Coefficients for the model
*/
void least_squares(std::vector<int>& inds, double* coeffs);
void least_squares(std::vector<int>& inds, double* coeffs, int start, int n_samp);
/**
* @brief Set the residual for the next step
......@@ -66,14 +68,14 @@ public:
* @param inds indexes of the selected features
* @param coeffs Coefficients of the model
*/
void set_error(std::vector<int>& inds, double* coeffs);
void set_error(std::vector<int>& inds, double* coeffs, int start, int n_samp);
/**
* @brief Set the A matrix for the least squares problem
*
* @param inds indexes of the selected features
*/
void set_a(std::vector<int>& inds);
void set_a(std::vector<int>& inds, int start, int n_samp);
/**
* @brief Fit the models
......
......@@ -22,6 +22,7 @@ FeatureSpace::FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
std::vector<std::string> allowed_ops,
std::vector<int> task_sizes,
int max_phi,
int n_sis_select,
int max_store_rung,
......@@ -33,6 +34,7 @@ FeatureSpace::FeatureSpace(
_phi_0(phi_0),
_allowed_ops(allowed_ops),
_scores(phi_0.size(), 0.0),
_task_sizes(task_sizes),
_start_gen(1, 0),
_mpi_comm(mpi_comm),
_l_bound(min_abs_feat_val),
......@@ -44,6 +46,7 @@ FeatureSpace::FeatureSpace(
_n_rung_store(max_store_rung),
_n_rung_generate(n_rung_generate)
{
_project = project_funcs::project_r;
if(_n_rung_generate > 1)
throw std::logic_error("A maximum of one rung can be generated on the fly.");
else if(_max_phi - _n_rung_generate < _n_rung_store)
......@@ -289,48 +292,9 @@ void FeatureSpace::generate_feature_space()
}
}
}
for(int ii = 0; ii < _mpi_comm->size(); ++ii)
{
_mpi_comm->barrier();
if(_mpi_comm->rank() == ii)
for(auto& feat : _phi)
std::cout << feat->expr() << std::endl;
_mpi_comm->barrier();
}
_n_feat = _phi.size();
}
void FeatureSpace::project_r(double* prop, int size)
{
std::vector<double> scores(_phi.size(), 0.0);
for(int ff = 0; ff < _phi.size(); ++ff)
_scores[ff] = -1.0 * std::abs(util_funcs::r(&prop[0], _phi[ff]->value_ptr(), _n_samp));
for(int pp = 1; pp < size / _n_samp; ++pp)
{
for(int ff = 0; ff < _phi.size(); ++ff)
scores[ff] = -1.0 * std::abs(util_funcs::r(&prop[_n_samp*pp], _phi[ff]->value_ptr(), _n_samp));
std::transform(scores.begin(), scores.end(), _scores.begin(), _scores.begin(), [](double s1, double s2){return std::min(s1, s2);});
}
}
std::vector<double> FeatureSpace::project_r(double* prop, int size, std::vector<node_ptr>& phi)
{
std::vector<double> scores(phi.size(), 0.0);
std::vector<double> scores_temp(phi.size(), 0.0);
for(int ff = 0; ff < phi.size(); ++ff)
scores[ff] = -1.0 * std::abs(util_funcs::r(&prop[0], phi[ff]->value_ptr(), _n_samp));
for(int pp = 1; pp < size / _n_samp; ++pp)
{
for(int ff = 0; ff < phi.size(); ++ff)
scores_temp[ff] = -1.0 * std::abs(util_funcs::r(&prop[_n_samp*pp], phi[ff]->value_ptr(), _n_samp));
std::transform(scores_temp.begin(), scores_temp.end(), scores.begin(), scores.begin(), [](double s1, double s2){return std::min(s1, s2);});
}
return scores;
}
void FeatureSpace::project_generated(double* prop, int size, std::vector<std::shared_ptr<FeatureNode>>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
{
for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
......@@ -342,7 +306,9 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<std::sh
std::vector<node_ptr> generated_phi;
generate_new_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound);
std::vector<double> scores = project_r(prop, size, generated_phi);
std::vector<double> scores(generated_phi.size(), 0.0);
_project(prop, scores.data(), generated_phi, _task_sizes, size / _n_samp);
std::vector<int> inds = util_funcs::argsort(scores);
int ii = 0;
......@@ -434,7 +400,8 @@ void FeatureSpace::sis(std::vector<double>& prop)
node_value_arrs::resize_d_matrix_arr(_n_sis_select);
_phi_selected.reserve(_phi_selected.size() + _n_sis_select);
project_r(prop.data(), prop.size());
_project(prop.data(), _scores.data(), _phi, _task_sizes, prop.size() / _n_samp);
std::vector<int> inds = util_funcs::argsort(_scores);
int ii = 0;
......@@ -459,7 +426,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
if(_n_rung_generate > 0)
{
for(auto& feat : phi_sel)
phi_sel.resize(cur_feat_local);
scores_sel.resize(cur_feat_local);
project_generated(prop.data(), prop.size(), phi_sel, scores_sel, scores_comp);
......@@ -586,20 +552,20 @@ void FeatureSpace::sis(std::vector<double>& prop)
for(int ii = _phi_selected.size() - _n_sis_select; ii < _phi_selected.size(); ++ii)
{
_phi_selected[ii]->set_value();
_phi_selected[ii]->set_test_value();
++cur_feat;
}
}
else
{
// cur_feat += cur_feat_local;
cur_feat_local = 0;
for(auto& feat : phi_sel)
{
std::cout << scores_sel[cur_feat_local] << '\t' << phi_sel[cur_feat_local]->expr() << std::endl;
_phi_selected.push_back(feat);
_phi_selected.back()->reindex(cur_feat);
_phi_selected.back()->set_value();
_phi_selected.back()->set_test_value();
++cur_feat;
++cur_feat_local;
}
}
if(cur_feat != node_value_arrs::N_SELECTED)
......
......@@ -5,6 +5,7 @@
#include <feature_creation/node/FeatureNode.hpp>
#include <feature_creation/node/operator_nodes/allowed_ops.hpp>
#include <feature_creation/node/value_storage/nodes_value_containers.hpp>
#include <utils/project.hpp>
#include <boost/serialization/shared_ptr.hpp>
......@@ -29,10 +30,12 @@ class FeatureSpace
std::vector<bin_op_node_gen> _bin_operators; //!< list of all binary operators
std::vector<double> _scores; //!< projection scores for each feature
std::vector<double> _prop; //!< property to learn
std::vector<int> _task_sizes; //!< The number of elements in each task
std::vector<int> _start_gen; //!< list of starting index for each generation
std::function<void(double*, double*, std::vector<node_ptr>&, std::vector<int>&, int)> _project; //!< Function used for projection onto SIS
std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPi communicator
double _l_bound; //!< lower bound for absolute value of the features
......@@ -46,6 +49,7 @@ class FeatureSpace
int _n_rung_generate; //!< Total number of rungs to generate on the fly
public:
/**
* @brief Constructor for the feature space
* @details constructs the feature space from an initial set of features and a list of allowed operatiors
......@@ -60,6 +64,7 @@ public:
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
std::vector<std::string> allowed_ops,
std::vector<int> task_sizes,
int max_phi=1,
int n_sis_select=1,
int max_store_rung=2,
......@@ -99,15 +104,7 @@ public:
*/
inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;}
/**
* @brief calculate the projection scores for all features for a given property
* @details Calculate the projection score based on the Pearson correlation
*
* @param prop [description]
*/
void project_r(double* prop, int size);
std::vector<double> project_r(double* prop, int size, std::vector<node_ptr>& phi);
inline std::vector<int> task_sizes(){return _task_sizes;}
void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50);
......
......@@ -2,10 +2,10 @@
InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::shared_ptr<MPI_Interface> comm) :
_opset(as_vector<std::string>(IP, "opset")),
_leave_out_inds(as_vector<int>(IP, "leave_out_inds")),
_filename(fn),
_data_file(IP.get<std::string>("data_file", "data.csv")),
_prop_key(IP.get<std::string>("property_key", "prop")),
_leave_out_inds(as_vector<int>(IP, "leave_out_inds")),
_l_bound(IP.get<double>("min_abs_feat_val", 1e-50)),
_u_bound(IP.get<double>("max_abs_feat_val", 1e50)),
_n_dim(IP.get<int>("desc_dim")),
......@@ -13,38 +13,118 @@ InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::sh
_max_rung(IP.get<int>("max_rung")),
_max_store_rung(IP.get<int>("n_rung_store", _max_rung - 1)),
_n_rung_generate(IP.get<int>("n_rung_generate", 0)),
_n_samp(-1),
_n_samp(0),
_n_residuals(IP.get<int>("n_residual", 1))