Skip to content
Snippets Groups Projects
Commit 7c2d8795 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Seperate out model printout from n_residuals

n_models_store now sets how many models to store (default still n_residual)
parent 1d68e41a
Branches
No related tags found
No related merge requests found
......@@ -9,6 +9,7 @@ SISSORegressor::SISSORegressor(
std::vector<int> leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
bool fix_intercept
):
_prop(prop),
......@@ -24,6 +25,7 @@ SISSORegressor::SISSORegressor(
_n_samp(prop.size()),
_n_dim(n_dim),
_n_residual(n_residual),
_n_models_store(n_models_store),
_lwork(-1),
_fix_intercept(fix_intercept)
{
......@@ -106,13 +108,15 @@ void SISSORegressor::fit()
start = std::clock();
std::vector<Model> models;
for(int rr = 0; rr < _n_residual; ++rr)
for(int rr = 0; rr < std::max(_n_residual, _n_models_store); ++rr)
{
node_value_arrs::clear_temp_test_reg();
model_node_ptr model_feat = std::make_shared<ModelNode>(_feat_space->phi_selected()[rr]->arr_ind(), _feat_space->phi_selected()[rr]->rung(), _feat_space->phi_selected()[rr]->expr(), _feat_space->phi_selected()[rr]->postfix_expr(), _feat_space->phi_selected()[rr]->value(), _feat_space->phi_selected()[rr]->test_value(), _feat_space->phi_selected()[rr]->unit());
models.push_back(Model(_prop, _prop_test, {model_feat}, _task_sizes_train, _task_sizes_test, _fix_intercept));
if(rr < _n_residual)
models.back().copy_error(&residual[rr * _n_samp]);
if(_mpi_comm->rank() == 0)
if((_mpi_comm->rank() == 0) && (rr < _n_models_store))
{
models.back().to_file("models/train_dim_1_model_" + std::to_string(rr) + ".dat");
if(_leave_out_inds.size() > 0)
......@@ -145,7 +149,7 @@ void SISSORegressor::fit()
{
std::cout << "Time for l0-norm: " << duration << std::endl;
for(int rr = 0; rr < _n_residual; ++rr)
for(int rr = 0; rr < _n_models_store; ++rr)
{
_models.back()[rr].to_file("models/train_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat");
if(_leave_out_inds.size() > 0)
......@@ -159,12 +163,13 @@ void SISSORegressor::fit()
void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
{
int n_get_models = std::max(_n_residual, _n_models_store);
std::vector<double> coefs(n_dim + 1, 0.0);
std::vector<int> inds(n_dim, 0);
std::vector<int> inds_min(_n_residual * _n_dim, -1);
std::vector<int> inds_min(n_get_models * _n_dim, -1);
std::vector<double> min_errors(_n_residual, util_funcs::norm(_prop.data(), _n_samp));
std::vector<double> min_errors(n_get_models, util_funcs::norm(_prop.data(), _n_samp));
for(int rr = 0; rr < n_dim; ++rr)
inds[rr] = _feat_space->phi_selected().size() - 1 - rr;
......@@ -192,18 +197,18 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
}
} while(util_funcs::iterate(inds, inds.size(), _mpi_comm->size()));
std::vector<double> all_min_error(_mpi_comm->size() * _n_residual);
std::vector<int> all_inds_min(_mpi_comm->size() *_n_residual * n_dim);
std::vector<double> all_min_error(_mpi_comm->size() * n_get_models);
std::vector<int> all_inds_min(_mpi_comm->size() * n_get_models * n_dim);
mpi::all_gather(*_mpi_comm, min_errors.data(), _n_residual, all_min_error);
mpi::all_gather(*_mpi_comm, inds_min.data(), _n_residual * n_dim, all_inds_min);
mpi::all_gather(*_mpi_comm, min_errors.data(), n_get_models, all_min_error);
mpi::all_gather(*_mpi_comm, inds_min.data(), n_get_models * n_dim, all_inds_min);
inds = util_funcs::argsort(all_min_error);
std::vector<model_node_ptr> min_nodes(n_dim);
std::vector<Model> models;
for(int rr = 0; rr < _n_residual; ++rr)
for(int rr = 0; rr < n_get_models; ++rr)
{
node_value_arrs::clear_temp_test_reg();
for(int ii = 0; ii < n_dim; ++ii)
......
......
......@@ -46,6 +46,7 @@ protected:
int _n_dim; //!< Number of dimensions to calculate
int _n_residual; //!< Number of residuals to pass to the next sis model
int _lwork; //!< size of the work array (for dgels_)
int _n_models_store; //!< The number of models to store in files
bool _fix_intercept; //!< If true fix intercept to 0
public:
......@@ -60,9 +61,10 @@ public:
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim Maximum dimensionality of the generated models
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store Number of features to store in files
* @param fix_intrecept If true fix intercept to 0
*/
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual, bool fix_intercept=false);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual, int n_models_store, bool fix_intercept=false);
/**
* @brief Get the optimal size of the working array
......@@ -159,6 +161,12 @@ public:
*/
inline int n_residual(){return _n_residual;}
// DocString: sisso_reg_n_modles_store
/**
* @brief The number of models to store in output files
*/
inline int n_models_store(){return _n_models_store;}
// Python interface functions
#ifdef PY_BINDINGS
/**
......@@ -172,6 +180,7 @@ public:
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim Maximum dimensionality of the generated models
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store Number of features to store in files
* @param fix_intrecept If true fix intercept to 0
*/
SISSORegressor(
......@@ -183,6 +192,7 @@ public:
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
bool fix_intercept=false
);
......@@ -198,6 +208,7 @@ public:
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim Maximum dimensionality of the generated models
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store Number of features to store in files
* @param fix_intrecept If true fix intercept to 0
*/
SISSORegressor(
......@@ -209,6 +220,7 @@ public:
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
bool fix_intercept=false
);
......
......
......@@ -17,6 +17,7 @@ InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::sh
_n_rung_generate(IP.get<int>("n_rung_generate", 0)),
_n_samp(0),
_n_residuals(IP.get<int>("n_residual", 1)),
_n_models_store(IP.get<int>("n_models_store", _n_residuals)),
_fix_intercept(IP.get<bool>("fix_intercept", false))
{
std::ifstream data_stream;
......
......
......@@ -60,6 +60,7 @@ public:
int _n_sis_select; //!< //!< Number of features to select for each dimensions
int _n_samp; //!< //!< Number of samples (training data)
int _n_residuals; //!< Number of residuals to pass to the next sis model
int _n_models_store; //!< Number of models to store
bool _fix_intercept; //!< If true force intercept to be 0.0
/**
......
......
......@@ -36,7 +36,7 @@ int main(int argc, char const *argv[])
std::cout<< "time input_parsing/Feature space generation: "<< duration << std::endl;
node_value_arrs::initialize_d_matrix_arr();
SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._leave_out_inds, IP._n_dim, IP._n_residuals, IP._fix_intercept);
SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._leave_out_inds, IP._n_dim, IP._n_residuals, IP._n_models_store, IP._fix_intercept);
sisso.fit();
if(mpi_setup::comm->rank() == 0)
......@@ -49,12 +49,6 @@ int main(int argc, char const *argv[])
else
std::cout << std::endl;
std::cout << sisso.models()[ii][0] << "\n" << std::endl;
// for(int jj = 0; jj < sisso.models()[ii].size(); ++jj)
// {
// sisso.models()[ii][jj].to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat");
// if(IP._prop_test.size() > 0)
// sisso.models()[ii][jj].to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat", false, IP._leave_out_inds);
// }
}
}
......
......
......@@ -187,6 +187,7 @@ def generate_fs_sr_from_csv(
n_sis_select,
max_dim,
n_residuals=1,
n_model_store=1,
task_key=None,
leave_out_frac=0.0,
):
......@@ -223,5 +224,6 @@ def generate_fs_sr_from_csv(
leave_out_inds,
max_dim,
n_residuals,
n_model_store,
)
return fs, sr
......@@ -374,8 +374,8 @@ void sisso::descriptor_identifier::registerModel()
void sisso::descriptor_identifier::registerSISSORegressor()
{
class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, py::list, py::list, py::list, int, int, optional<bool>>())
.def(init<std::shared_ptr<FeatureSpace>, py::list, py::list, py::list, py::list, py::list, int, int, optional<bool>>())
class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, py::list, py::list, py::list, int, int, int, optional<bool>>())
.def(init<std::shared_ptr<FeatureSpace>, py::list, py::list, py::list, py::list, py::list, int, int, int, optional<bool>>())
.def("fit", &SISSORegressor::fit, "@DocString_sisso_reg_fit@")
.add_property("prop", &SISSORegressor::prop_py, "@DocString_sisso_reg_prop_py@")
.add_property("prop_test", &SISSORegressor::prop_test_py, "@DocString_sisso_reg_prop_test_py@")
......@@ -383,6 +383,7 @@ void sisso::descriptor_identifier::registerSISSORegressor()
.add_property("n_samp", &SISSORegressor::n_samp, "@DocString_sisso_reg_n_samp@")
.add_property("n_dim", &SISSORegressor::n_dim, "@DocString_sisso_reg_n_dim@")
.add_property("n_residual", &SISSORegressor::n_residual, "@DocString_sisso_reg_n_residual@")
.add_property("n_models_store", &SISSORegressor::n_models_store, "@DocString_sisso_reg_n_models_store@")
.add_property("feat_space", &SISSORegressor::feat_space, "@DocString_sisso_reg_feat_space@")
.add_property("task_sizes_train", &SISSORegressor::task_sizes_train, "@DocString_sisso_reg_task_sizes_train@")
.add_property("task_sizes_test", &SISSORegressor::task_sizes_test, "@DocString_sisso_reg_task_sizes_test@")
......
......
......@@ -9,6 +9,7 @@ SISSORegressor::SISSORegressor(
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
bool fix_intercept
) :
_prop(python_conv_utils::from_ndarray<double>(prop)),
......@@ -24,6 +25,7 @@ SISSORegressor::SISSORegressor(
_n_samp(prop.shape(0)),
_n_dim(n_dim),
_n_residual(n_residual),
_n_models_store(n_models_store),
_lwork(-1),
_fix_intercept(fix_intercept)
{
......@@ -46,6 +48,7 @@ SISSORegressor::SISSORegressor(
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
bool fix_intercept
) :
_prop(python_conv_utils::from_list<double>(prop)),
......@@ -61,6 +64,7 @@ SISSORegressor::SISSORegressor(
_n_samp(py::len(prop)),
_n_dim(n_dim),
_n_residual(n_residual),
_n_models_store(n_models_store),
_lwork(-1),
_fix_intercept(fix_intercept)
{
......
......
......@@ -2,12 +2,13 @@
"desc_dim": 2,
"n_sis_select": 10,
"max_rung": 2,
"n_residual": 10,
"n_residual": 1,
"data_file": "data.csv",
"property_key": "Prop",
"task_key": "Task",
"leave_out_frac": 0.2,
"n_rung_generate": 0,
"n_models_store": 10,
"leave_out_inds": [],
"fix_intercept": false,
"opset": ["add", "sub", "mult", "div", "exp", "inv", "sq", "cb", "sqrt", "cbrt", "abs_diff"]
......
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment