diff --git a/src/descriptor_identifier/SISSORegressor.cpp b/src/descriptor_identifier/SISSORegressor.cpp index 1b9401683732c4cb5fbc61e5a526f18bb530b69a..6aff0675cf9569babee76f61fed02ef052672feb 100644 --- a/src/descriptor_identifier/SISSORegressor.cpp +++ b/src/descriptor_identifier/SISSORegressor.cpp @@ -9,6 +9,7 @@ SISSORegressor::SISSORegressor( std::vector<int> leave_out_inds, int n_dim, int n_residual, + int n_models_store, bool fix_intercept ): _prop(prop), @@ -24,6 +25,7 @@ SISSORegressor::SISSORegressor( _n_samp(prop.size()), _n_dim(n_dim), _n_residual(n_residual), + _n_models_store(n_models_store), _lwork(-1), _fix_intercept(fix_intercept) { @@ -106,13 +108,15 @@ void SISSORegressor::fit() start = std::clock(); std::vector<Model> models; - for(int rr = 0; rr < _n_residual; ++rr) + for(int rr = 0; rr < std::max(_n_residual, _n_models_store); ++rr) { node_value_arrs::clear_temp_test_reg(); model_node_ptr model_feat = std::make_shared<ModelNode>(_feat_space->phi_selected()[rr]->arr_ind(), _feat_space->phi_selected()[rr]->rung(), _feat_space->phi_selected()[rr]->expr(), _feat_space->phi_selected()[rr]->postfix_expr(), _feat_space->phi_selected()[rr]->value(), _feat_space->phi_selected()[rr]->test_value(), _feat_space->phi_selected()[rr]->unit()); models.push_back(Model(_prop, _prop_test, {model_feat}, _task_sizes_train, _task_sizes_test, _fix_intercept)); - models.back().copy_error(&residual[rr * _n_samp]); - if(_mpi_comm->rank() == 0) + if(rr < _n_residual) + models.back().copy_error(&residual[rr * _n_samp]); + + if((_mpi_comm->rank() == 0) && (rr < _n_models_store)) { models.back().to_file("models/train_dim_1_model_" + std::to_string(rr) + ".dat"); if(_leave_out_inds.size() > 0) @@ -145,7 +149,7 @@ void SISSORegressor::fit() { std::cout << "Time for l0-norm: " << duration << std::endl; - for(int rr = 0; rr < _n_residual; ++rr) + for(int rr = 0; rr < _n_models_store; ++rr) { _models.back()[rr].to_file("models/train_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat"); if(_leave_out_inds.size() > 0) @@ -159,12 +163,13 @@ void SISSORegressor::fit() void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) { + int n_get_models = std::max(_n_residual, _n_models_store); std::vector<double> coefs(n_dim + 1, 0.0); std::vector<int> inds(n_dim, 0); - std::vector<int> inds_min(_n_residual * _n_dim, -1); + std::vector<int> inds_min(n_get_models * _n_dim, -1); - std::vector<double> min_errors(_n_residual, util_funcs::norm(_prop.data(), _n_samp)); + std::vector<double> min_errors(n_get_models, util_funcs::norm(_prop.data(), _n_samp)); for(int rr = 0; rr < n_dim; ++rr) inds[rr] = _feat_space->phi_selected().size() - 1 - rr; @@ -192,18 +197,18 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) } } while(util_funcs::iterate(inds, inds.size(), _mpi_comm->size())); - std::vector<double> all_min_error(_mpi_comm->size() * _n_residual); - std::vector<int> all_inds_min(_mpi_comm->size() *_n_residual * n_dim); + std::vector<double> all_min_error(_mpi_comm->size() * n_get_models); + std::vector<int> all_inds_min(_mpi_comm->size() * n_get_models * n_dim); - mpi::all_gather(*_mpi_comm, min_errors.data(), _n_residual, all_min_error); - mpi::all_gather(*_mpi_comm, inds_min.data(), _n_residual * n_dim, all_inds_min); + mpi::all_gather(*_mpi_comm, min_errors.data(), n_get_models, all_min_error); + mpi::all_gather(*_mpi_comm, inds_min.data(), n_get_models * n_dim, all_inds_min); inds = util_funcs::argsort(all_min_error); std::vector<model_node_ptr> min_nodes(n_dim); std::vector<Model> models; - for(int rr = 0; rr < _n_residual; ++rr) + for(int rr = 0; rr < n_get_models; ++rr) { node_value_arrs::clear_temp_test_reg(); for(int ii = 0; ii < n_dim; ++ii) diff --git a/src/descriptor_identifier/SISSORegressor.hpp b/src/descriptor_identifier/SISSORegressor.hpp index 0fc9d0ad19b95f47b3d955ab3ba510ff03ed2a81..b3887d451257622ecb2fd74fac8acb0203637848 100644 --- a/src/descriptor_identifier/SISSORegressor.hpp +++ b/src/descriptor_identifier/SISSORegressor.hpp @@ -46,6 +46,7 @@ protected: int _n_dim; //!< Number of dimensions to calculate int _n_residual; //!< Number of residuals to pass to the next sis model int _lwork; //!< size of the work array (for dgels_) + int _n_models_store; //!< The number of models to store in files bool _fix_intercept; //!< If true fix intercept to 0 public: @@ -60,9 +61,10 @@ public: * @param leave_out_inds List of indexes from the initial data file in the test set * @param n_dim Maximum dimensionality of the generated models * @param n_residual Number of residuals to pass to the next SIS operation + * @param n_models_store Number of features to store in files * @param fix_intrecept If true fix intercept to 0 */ - SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual, bool fix_intercept=false); + SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, std::vector<int> leave_out_inds, int n_dim, int n_residual, int n_models_store, bool fix_intercept=false); /** * @brief Get the optimal size of the working array @@ -159,6 +161,12 @@ public: */ inline int n_residual(){return _n_residual;} + // DocString: sisso_reg_n_modles_store + /** + * @brief The number of models to store in output files + */ + inline int n_models_store(){return _n_models_store;} + // Python interface functions #ifdef PY_BINDINGS /** @@ -172,6 +180,7 @@ public: * @param leave_out_inds List of indexes from the initial data file in the test set * @param n_dim Maximum dimensionality of the generated models * @param n_residual Number of residuals to pass to the next SIS operation + * @param n_models_store Number of features to store in files * @param fix_intrecept If true fix intercept to 0 */ SISSORegressor( @@ -183,6 +192,7 @@ public: py::list leave_out_inds, int n_dim, int n_residual, + int n_models_store, bool fix_intercept=false ); @@ -198,6 +208,7 @@ public: * @param leave_out_inds List of indexes from the initial data file in the test set * @param n_dim Maximum dimensionality of the generated models * @param n_residual Number of residuals to pass to the next SIS operation + * @param n_models_store Number of features to store in files * @param fix_intrecept If true fix intercept to 0 */ SISSORegressor( @@ -209,6 +220,7 @@ public: py::list leave_out_inds, int n_dim, int n_residual, + int n_models_store, bool fix_intercept=false ); diff --git a/src/inputs/InputParser.cpp b/src/inputs/InputParser.cpp index f10e5ee1be911c6c91e2e4f9e0a3b805dd26a060..74627b9f044060f85f3e46e3a4b487099ed234d8 100644 --- a/src/inputs/InputParser.cpp +++ b/src/inputs/InputParser.cpp @@ -17,6 +17,7 @@ InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::sh _n_rung_generate(IP.get<int>("n_rung_generate", 0)), _n_samp(0), _n_residuals(IP.get<int>("n_residual", 1)), + _n_models_store(IP.get<int>("n_models_store", _n_residuals)), _fix_intercept(IP.get<bool>("fix_intercept", false)) { std::ifstream data_stream; diff --git a/src/inputs/InputParser.hpp b/src/inputs/InputParser.hpp index f55e4c18ec5f75d8dd85f6405475b9a3fbc025e0..b019a4d35d9ffd434ba974e02cbb2eeed8e2128e 100644 --- a/src/inputs/InputParser.hpp +++ b/src/inputs/InputParser.hpp @@ -60,6 +60,7 @@ public: int _n_sis_select; //!< //!< Number of features to select for each dimensions int _n_samp; //!< //!< Number of samples (training data) int _n_residuals; //!< Number of residuals to pass to the next sis model + int _n_models_store; //!< Number of models to store bool _fix_intercept; //!< If true force intercept to be 0.0 /** diff --git a/src/main.cpp b/src/main.cpp index 395c3c6f7ff78c6ede1598a7b1bd309898e53a25..560accfa5861e0cd39e3fd4c9db1fd2d93fa0a94 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -36,7 +36,7 @@ int main(int argc, char const *argv[]) std::cout<< "time input_parsing/Feature space generation: "<< duration << std::endl; node_value_arrs::initialize_d_matrix_arr(); - SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._leave_out_inds, IP._n_dim, IP._n_residuals, IP._fix_intercept); + SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._leave_out_inds, IP._n_dim, IP._n_residuals, IP._n_models_store, IP._fix_intercept); sisso.fit(); if(mpi_setup::comm->rank() == 0) @@ -49,12 +49,6 @@ int main(int argc, char const *argv[]) else std::cout << std::endl; std::cout << sisso.models()[ii][0] << "\n" << std::endl; - // for(int jj = 0; jj < sisso.models()[ii].size(); ++jj) - // { - // sisso.models()[ii][jj].to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat"); - // if(IP._prop_test.size() > 0) - // sisso.models()[ii][jj].to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat", false, IP._leave_out_inds); - // } } } diff --git a/src/python/__init__.py b/src/python/__init__.py index 52884c2ac27b859950b29a9d56c1df83bbf86adc..d196314ce88da7bad113845b02ec871830335b0f 100644 --- a/src/python/__init__.py +++ b/src/python/__init__.py @@ -187,6 +187,7 @@ def generate_fs_sr_from_csv( n_sis_select, max_dim, n_residuals=1, + n_model_store=1, task_key=None, leave_out_frac=0.0, ): @@ -223,5 +224,6 @@ def generate_fs_sr_from_csv( leave_out_inds, max_dim, n_residuals, + n_model_store, ) return fs, sr diff --git a/src/python/bindings_docstring_keyed.cpp b/src/python/bindings_docstring_keyed.cpp index 4b7e00ded1408176c4459a50d316dce26cdc234e..bf672548b3c56a7d79e388b25f93456e1a6750ab 100644 --- a/src/python/bindings_docstring_keyed.cpp +++ b/src/python/bindings_docstring_keyed.cpp @@ -374,8 +374,8 @@ void sisso::descriptor_identifier::registerModel() void sisso::descriptor_identifier::registerSISSORegressor() { - class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, py::list, py::list, py::list, int, int, optional<bool>>()) - .def(init<std::shared_ptr<FeatureSpace>, py::list, py::list, py::list, py::list, py::list, int, int, optional<bool>>()) + class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, py::list, py::list, py::list, int, int, int, optional<bool>>()) + .def(init<std::shared_ptr<FeatureSpace>, py::list, py::list, py::list, py::list, py::list, int, int, int, optional<bool>>()) .def("fit", &SISSORegressor::fit, "@DocString_sisso_reg_fit@") .add_property("prop", &SISSORegressor::prop_py, "@DocString_sisso_reg_prop_py@") .add_property("prop_test", &SISSORegressor::prop_test_py, "@DocString_sisso_reg_prop_test_py@") @@ -383,6 +383,7 @@ void sisso::descriptor_identifier::registerSISSORegressor() .add_property("n_samp", &SISSORegressor::n_samp, "@DocString_sisso_reg_n_samp@") .add_property("n_dim", &SISSORegressor::n_dim, "@DocString_sisso_reg_n_dim@") .add_property("n_residual", &SISSORegressor::n_residual, "@DocString_sisso_reg_n_residual@") + .add_property("n_models_store", &SISSORegressor::n_models_store, "@DocString_sisso_reg_n_models_store@") .add_property("feat_space", &SISSORegressor::feat_space, "@DocString_sisso_reg_feat_space@") .add_property("task_sizes_train", &SISSORegressor::task_sizes_train, "@DocString_sisso_reg_task_sizes_train@") .add_property("task_sizes_test", &SISSORegressor::task_sizes_test, "@DocString_sisso_reg_task_sizes_test@") diff --git a/src/python/descriptor_identifier/SISSORegressor.cpp b/src/python/descriptor_identifier/SISSORegressor.cpp index 4f9a04f4cbcf5558e4fe576b8776d180bd3851dc..e7568b3ca4a6108a273bdf402e1d888535de8624 100644 --- a/src/python/descriptor_identifier/SISSORegressor.cpp +++ b/src/python/descriptor_identifier/SISSORegressor.cpp @@ -9,6 +9,7 @@ SISSORegressor::SISSORegressor( py::list leave_out_inds, int n_dim, int n_residual, + int n_models_store, bool fix_intercept ) : _prop(python_conv_utils::from_ndarray<double>(prop)), @@ -24,6 +25,7 @@ SISSORegressor::SISSORegressor( _n_samp(prop.shape(0)), _n_dim(n_dim), _n_residual(n_residual), + _n_models_store(n_models_store), _lwork(-1), _fix_intercept(fix_intercept) { @@ -46,6 +48,7 @@ SISSORegressor::SISSORegressor( py::list leave_out_inds, int n_dim, int n_residual, + int n_models_store, bool fix_intercept ) : _prop(python_conv_utils::from_list<double>(prop)), @@ -61,6 +64,7 @@ SISSORegressor::SISSORegressor( _n_samp(py::len(prop)), _n_dim(n_dim), _n_residual(n_residual), + _n_models_store(n_models_store), _lwork(-1), _fix_intercept(fix_intercept) { diff --git a/tests/sisso.json b/tests/sisso.json index 0b03bb7eab586fed1dd1bf4cdab487dade2f48d7..830a10790f12cadb8b87bb29637b14f8f1c3d7c8 100644 --- a/tests/sisso.json +++ b/tests/sisso.json @@ -2,12 +2,13 @@ "desc_dim": 2, "n_sis_select": 10, "max_rung": 2, - "n_residual": 10, + "n_residual": 1, "data_file": "data.csv", "property_key": "Prop", "task_key": "Task", "leave_out_frac": 0.2, "n_rung_generate": 0, + "n_models_store": 10, "leave_out_inds": [], "fix_intercept": false, "opset": ["add", "sub", "mult", "div", "exp", "inv", "sq", "cb", "sqrt", "cbrt", "abs_diff"]