From 1902fece5c62de0fb8a876f7a438ea059aedc1c5 Mon Sep 17 00:00:00 2001 From: Thomas Purcell <purcell@fhi-berlin.mpg.de> Date: Mon, 8 Jun 2020 08:18:26 +0200 Subject: [PATCH] Adding: Multiple residuals fitting and leave-out CV keys Did them one after another, adding both here now --- src/descriptor_identifier/Model/Model.cpp | 134 +++++++++++------- src/descriptor_identifier/Model/Model.hpp | 68 +++++---- src/descriptor_identifier/SISSORegressor.cpp | 80 +++++++---- src/descriptor_identifier/SISSORegressor.hpp | 15 +- .../feature_space/FeatureSpace.cpp | 33 +++-- .../feature_space/FeatureSpace.hpp | 2 +- src/feature_creation/node/FeatureNode.cpp | 8 +- src/feature_creation/node/FeatureNode.hpp | 22 ++- src/feature_creation/node/Node.cpp | 3 +- src/feature_creation/node/Node.hpp | 27 +++- .../node/operator_nodes/OperatorNode.cpp | 21 ++- .../node/operator_nodes/OperatorNode.hpp | 14 ++ .../absolute_difference.cpp | 13 +- .../absolute_difference.hpp | 6 + .../allowed_operator_nodes/absolute_value.cpp | 6 +- .../allowed_operator_nodes/absolute_value.hpp | 6 + .../allowed_operator_nodes/add.cpp | 10 +- .../allowed_operator_nodes/add.hpp | 6 + .../allowed_operator_nodes/cos.cpp | 6 +- .../allowed_operator_nodes/cos.hpp | 6 + .../allowed_operator_nodes/cube.cpp | 4 + .../allowed_operator_nodes/cube.hpp | 6 + .../allowed_operator_nodes/cube_root.cpp | 4 + .../allowed_operator_nodes/cube_root.hpp | 6 + .../allowed_operator_nodes/divide.cpp | 12 +- .../allowed_operator_nodes/divide.hpp | 6 + .../allowed_operator_nodes/exponential.cpp | 6 +- .../allowed_operator_nodes/exponential.hpp | 6 + .../allowed_operator_nodes/inverse.cpp | 4 + .../allowed_operator_nodes/inverse.hpp | 6 + .../allowed_operator_nodes/log.cpp | 6 +- .../allowed_operator_nodes/log.hpp | 6 + .../allowed_operator_nodes/multiply.cpp | 12 +- .../allowed_operator_nodes/multiply.hpp | 6 + .../negative_exponential.cpp | 2 +- .../negative_exponential.hpp | 6 + .../allowed_operator_nodes/sin.cpp | 6 +- .../allowed_operator_nodes/sin.hpp | 6 + .../allowed_operator_nodes/sixth_power.cpp | 4 + .../allowed_operator_nodes/sixth_power.hpp | 6 + .../allowed_operator_nodes/square.cpp | 4 + .../allowed_operator_nodes/square.hpp | 5 + .../allowed_operator_nodes/square_root.cpp | 4 + .../allowed_operator_nodes/square_root.hpp | 6 + .../allowed_operator_nodes/subtract.cpp | 10 +- .../allowed_operator_nodes/subtract.hpp | 6 + .../node/operator_nodes/functions.hpp | 4 +- .../value_storage/nodes_value_containers.cpp | 31 +++- .../value_storage/nodes_value_containers.hpp | 45 +++++- src/feature_creation/units/Unit.cpp | 7 + src/inputs/InputParser.cpp | 58 ++++++-- src/inputs/InputParser.hpp | 10 +- src/main.cpp | 13 +- 53 files changed, 636 insertions(+), 173 deletions(-) diff --git a/src/descriptor_identifier/Model/Model.cpp b/src/descriptor_identifier/Model/Model.cpp index 6ffb64ec..3a3e09ae 100644 --- a/src/descriptor_identifier/Model/Model.cpp +++ b/src/descriptor_identifier/Model/Model.cpp @@ -1,70 +1,46 @@ #include <descriptor_identifier/Model/Model.hpp> -Model::Model(std::vector<double> prop, std::vector<std::shared_ptr<Node>> feats) : - _n_samp(feats[0]->n_samp()), +Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<Node>> feats) : + _n_samp_train(feats[0]->n_samp()), + _n_samp_test(feats[0]->n_test_samp()), _n_dim(feats.size() + 1), _feats(feats), - _coefs(new double[_n_dim]), - _prop(new double[_n_samp]), - _error(new double[_n_samp]), - _D(new double[_n_samp * _n_dim]), - _prop_est(_n_samp, 0.0) + _coefs(_n_dim), + _prop_train(prop_train), + _prop_test(prop_test), + _train_error(_n_samp_train), + _test_error(_n_samp_test), + _D_train(_n_samp_train * _n_dim), + _D_test(_n_samp_test * _n_dim), + _prop_train_est(_n_samp_train, 0.0), + _prop_test_est(_n_samp_test, 0.0) { - _prop_est.reserve(_n_samp); + _prop_train_est.reserve(_n_samp_train); + _prop_test_est.reserve(_n_samp_test); - std::copy_n(prop.begin(), _n_samp, _prop.get()); - - std::vector<double> a(_n_samp * _n_dim, 1.0); + std::vector<double> a(_n_samp_train * _n_dim, 1.0); for(int ff = 0; ff < feats.size(); ++ff) { - std::copy_n(feats[ff]->value_ptr(), _n_samp, _D.get() + ff * _n_samp); - std::copy_n(feats[ff]->value_ptr(), _n_samp, a.begin() + ff * _n_samp); + std::copy_n(feats[ff]->value_ptr(), _n_samp_train, _D_train.data() + ff * _n_samp_train); + std::copy_n(feats[ff]->test_value_ptr(), _n_samp_test, _D_test.data() + ff * _n_samp_test); + std::copy_n(feats[ff]->value_ptr(), _n_samp_train, a.data() + ff * _n_samp_train); } - std::copy_n(a.begin() + feats.size() * _n_samp, _n_samp, _D.get() + feats.size() * _n_samp); + std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_train, _D_train.data() + feats.size() * _n_samp_train); + std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_test, _D_test.data() + feats.size() * _n_samp_test); std::vector<double> s(_n_dim, 0.0); - std::vector<double> work(_n_dim * _n_samp, 0.0); + std::vector<double> work(_n_dim * _n_samp_train, 0.0); int rank = 0; int info = 0; - dgelss_(_n_samp, _n_dim, 1, a.data(), _n_samp, prop.data(), _n_samp, s.data(), 1e-13, &rank, work.data(), work.size(), &info); - std::copy_n(prop.begin(), _n_dim, _coefs.get()); - - dgemv_('N', _n_samp, _n_dim, 1.0, _D.get(), _n_samp, _coefs.get(), 1, 0.0, _prop_est.data(), 1); + dgelss_(_n_samp_train, _n_dim, 1, a.data(), _n_samp_train, prop_train.data(), _n_samp_train, s.data(), 1e-13, &rank, work.data(), work.size(), &info); + std::copy_n(prop_train.begin(), _n_dim, _coefs.data()); - std::transform(_prop_est.begin(), _prop_est.end(), _prop.get(), _error.get(), std::minus<double>()); -} + dgemv_('N', _n_samp_train, _n_dim, 1.0, _D_train.data(), _n_samp_train, _coefs.data(), 1, 0.0, _prop_train_est.data(), 1); + dgemv_('N', _n_samp_test, _n_dim, 1.0, _D_test.data(), _n_samp_test, _coefs.data(), 1, 0.0, _prop_test_est.data(), 1); -Model::Model(Model &o) : - _n_samp(o._n_samp), - _n_dim(o._n_dim), - _feats(o._feats), - _coefs(new double[_n_dim]), - _prop(new double[_n_samp]), - _error(new double[_n_samp]), - _D(new double[_n_samp * _n_dim]), - _prop_est(o._prop_est) -{ - std::copy_n(o._coefs.get(), o._n_dim, _coefs.get()); - std::copy_n(o._prop.get(), o._n_samp, _prop.get()); - std::copy_n(o._error.get(), o._n_samp, _error.get()); - std::copy_n(o._D.get(), o._n_samp * o._n_dim, _D.get()); -} - -Model::Model(Model &&o) : - _n_samp(o._n_samp), - _n_dim(o._n_dim), - _feats(o._feats), - _coefs(new double[_n_dim]), - _prop(new double[_n_samp]), - _error(new double[_n_samp]), - _D(new double[_n_samp * _n_dim]), - _prop_est(o._prop_est) -{ - std::copy_n(o._coefs.get(), o._n_dim, _coefs.get()); - std::copy_n(o._prop.get(), o._n_samp, _prop.get()); - std::copy_n(o._error.get(), o._n_samp, _error.get()); - std::copy_n(o._D.get(), o._n_samp * o._n_dim, _D.get()); + std::transform(_prop_train_est.begin(), _prop_train_est.end(), _prop_train.data(), _train_error.data(), std::minus<double>()); + std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.data(), _test_error.data(), std::minus<double>()); } std::string Model::toString() const @@ -81,3 +57,59 @@ std::ostream& operator<< (std::ostream& outStream, const Model& model) outStream << model.toString(); return outStream; } + +void Model::train_to_file(std::string filename) +{ + boost::filesystem::path p(filename.c_str()); + boost::filesystem::create_directories(p.remove_filename()); + + std::ofstream out_file_stream = std::ofstream(); + out_file_stream.open(filename); + + out_file_stream << "# " << toString() << std::endl; + out_file_stream << "# RMSE: " << rmse() << "; Max AE( " << max_ae() << std::endl; + out_file_stream << "# coeffs:"; + for(auto& coef: _coefs) + out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";"; + out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),"; + for(int ff = 0; ff < _feats.size(); ++ff) + out_file_stream << " Feature " << ff << " Value,"; + out_file_stream << std::endl; + + for(int ss = 0; ss < _n_samp_train; ++ss) + { + out_file_stream << std::setw(24) << std::setprecision(18) << _prop_train[ss] << std::setw(24) << std::setprecision(18) << _prop_train_est[ss]; + for(int ff = 0; ff < _n_dim - 1; ++ff) + out_file_stream << std::setw(24) << std::setprecision(18) << _D_train[ss + ff * _n_samp_train]; + out_file_stream << std::endl; + } + out_file_stream.close(); +} + +void Model::test_to_file(std::string filename) +{ + boost::filesystem::path p(filename.c_str()); + boost::filesystem::create_directories(p.remove_filename()); + + std::ofstream out_file_stream = std::ofstream(); + out_file_stream.open(filename); + + out_file_stream << "# " << toString() << std::endl; + out_file_stream << "# RMSE: " << test_rmse() << "; Max AE: " << test_max_ae() << std::endl; + out_file_stream << "# coeffs:"; + for(auto& coef: _coefs) + out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";"; + out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),"; + for(int ff = 0; ff < _feats.size(); ++ff) + out_file_stream << " Feature " << ff << " Value,"; + out_file_stream << std::endl; + + for(int ss = 0; ss < _n_samp_test; ++ss) + { + out_file_stream << std::setw(24) << std::setprecision(18) << _prop_test[ss] << std::setw(24) << std::setprecision(18) << _prop_test_est[ss]; + for(int ff = 0; ff < _n_dim - 1; ++ff) + out_file_stream << std::setw(24) << std::setprecision(18) << _D_test[ss + ff * _n_samp_train]; + out_file_stream << std::endl; + } + out_file_stream.close(); +} diff --git a/src/descriptor_identifier/Model/Model.hpp b/src/descriptor_identifier/Model/Model.hpp index b4364916..208664ad 100644 --- a/src/descriptor_identifier/Model/Model.hpp +++ b/src/descriptor_identifier/Model/Model.hpp @@ -1,6 +1,12 @@ #ifndef MODEL #define MODEL +#include <boost/filesystem.hpp> + +#include<iomanip> +#include<fstream> +#include<iostream> + #include <feature_creation/node/Node.hpp> /** @@ -9,17 +15,22 @@ */ class Model { - int _n_samp; //!< The number of samples per feature + int _n_samp_train; //!< The number of samples per feature + int _n_samp_test; //!< The number of test samples per feature int _n_dim; //!< Dimension of the model std::vector<std::shared_ptr<Node>> _feats; //!< List of features in the model - std::unique_ptr<double[]> _coefs; //!< Coefficients for teh features - std::unique_ptr<double[]> _prop; //!< The property to be modeled - std::unique_ptr<double[]> _error; //!< The error of the model - std::unique_ptr<double[]> _D; //!< The Descriptor matrix + std::vector<double> _coefs; //!< Coefficients for teh features + std::vector<double> _prop_train; //!< The property to be modeled + std::vector<double> _prop_test; //!< The property to be modeled + std::vector<double> _train_error; //!< The error of the model + std::vector<double> _test_error; //!< The error of the model + std::vector<double> _D_train; //!< The Descriptor matrix + std::vector<double> _D_test; //!< The Descriptor matrix - std::vector<double> _prop_est; //!< The estimated Property + std::vector<double> _prop_train_est; //!< The estimated Property + std::vector<double> _prop_test_est; //!< The estimated Property public: /** @@ -28,21 +39,8 @@ public: * @param prop The property * @param feats The features for the model */ - Model(std::vector<double> prop, std::vector<std::shared_ptr<Node>> feats); - - /** - * @brief The copy constructor - * - * @param o The model to be copied - */ - Model(Model& o); + Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<Node>> feats); - /** - * @brief The Move constructor - * - * @param o The Model to be moved - */ - Model(Model&& o); /** * @brief Convert the model to a string @@ -54,27 +52,49 @@ public: /** * @brief Accessor function to _prop_est */ - inline std::vector<double>& predict(){return _prop_est;} + inline std::vector<double>& predict(){return _prop_test_est;} + + /** + * @brief Accessor function to _prop_est + */ + inline std::vector<double>& predict_train(){return _prop_train_est;} /** * @brief Copy the error into a new array * * @param res pointer to the beginning of the array */ - inline void copy_error(double* res){std::copy_n(_error.get(), _n_samp, res);} + inline void copy_error(double* res){std::copy_n(_train_error.data(), _n_samp_train, res);} /** * @brief The rmes of the model */ - inline double rmse(){return util_funcs::norm(_error.get(), _n_samp);} + inline double rmse(){return util_funcs::norm(_train_error.data(), _n_samp_train) / std::sqrt(static_cast<double>(_n_samp_train));} + inline double test_rmse(){return util_funcs::norm(_test_error.data(), _n_samp_test) / std::sqrt(static_cast<double>(_n_samp_test));} /** * @brief The max Absolute error of the array */ inline double max_ae() { - return *std::max_element(_error.get(), _error.get() + _n_samp, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}); + return *std::max_element(_train_error.data(), _train_error.data() + _n_samp_train, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}); + } + + inline double test_max_ae() + { + return *std::max_element(_test_error.data(), _test_error.data() + _n_samp_test, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}); } + + + /** + * @brief Print model to a file + */ + void test_to_file(std::string filename); + + /** + * @brief Print model to a file + */ + void train_to_file(std::string filename); }; /** diff --git a/src/descriptor_identifier/SISSORegressor.cpp b/src/descriptor_identifier/SISSORegressor.cpp index f8eaac13..978dc0b6 100644 --- a/src/descriptor_identifier/SISSORegressor.cpp +++ b/src/descriptor_identifier/SISSORegressor.cpp @@ -1,10 +1,11 @@ #include <descriptor_identifier/SISSORegressor.hpp> -SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim): +SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, int n_dim, int n_residual): _feat_space(feat_space), _mpi_comm(feat_space->mpi_comm()), _n_samp(prop.size()), _n_dim(n_dim), + _n_residual(n_residual), _lwork(-1), _rank(0), _a(new double[(_n_dim + 1) * _n_samp]), @@ -14,7 +15,8 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve _work(nullptr), _s(new double[n_dim + 1]), _models(), - _prop(prop) + _prop(prop), + _prop_test(prop_test) { // Initialize a, b, ones, s, and _error arrays @@ -77,20 +79,28 @@ void SISSORegressor::set_error(std::vector<int>& inds, double* coeffs) void SISSORegressor::fit() { - std::vector<double> residual(_n_samp); + std::vector<double> residual(_n_samp * _n_residual); std::clock_t start; double duration; start = std::clock(); + _feat_space->sis(_prop); + _mpi_comm->barrier(); duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; if(_mpi_comm->rank() == 0) std::cout << "Time for SIS: " << duration << std::endl; start = std::clock(); - std::vector<node_ptr> min_node(1, _feat_space->phi_selected()[0]); - _models.push_back(Model(_prop, min_node)); - _models[_models.size() - 1].copy_error(residual.data()); + + std::vector<Model> models; + for(int rr = 0; rr < _n_residual; ++rr) + { + models.push_back(Model(_prop, _prop_test, {_feat_space->phi_selected()[rr]})); + models.back().copy_error(&residual[rr * _n_samp]); + } + _models.push_back(models); + _mpi_comm->barrier(); duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; if(_mpi_comm->rank() == 0) @@ -98,20 +108,24 @@ void SISSORegressor::fit() for(int dd = 2; dd <= _n_dim; ++dd) { - start = std::clock(); + start = std::clock(); _feat_space->sis(residual); - _mpi_comm->barrier(); - duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; + + _mpi_comm->barrier(); + duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; if(_mpi_comm->rank() == 0) std::cout << "Time for SIS: " << duration << std::endl; + start = std::clock(); - l0_norm(residual, dd); - _mpi_comm->barrier(); + l0_norm(_prop, dd); + + _mpi_comm->barrier(); duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; if(_mpi_comm->rank() == 0) std::cout << "Time for l0-norm: " << duration << std::endl; - _models[_models.size() - 1].copy_error(residual.data()); + for(int rr = 0; rr < _n_residual; ++rr) + _models.back()[rr].copy_error(&residual[rr * _n_samp]); } } @@ -120,9 +134,9 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) std::vector<double> coefs(n_dim + 1, 0.0); std::vector<int> inds(n_dim, 0); - std::vector<int> inds_min(n_dim); + std::vector<std::vector<int>> inds_min(_n_residual); - double min_error = util_funcs::norm(prop.data(), _n_samp); + std::vector<double> min_errors(_n_residual, util_funcs::norm(_prop.data(), _n_samp)); int n = _feat_space->phi_selected().size(); @@ -152,28 +166,40 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) least_squares(inds, coefs.data()); set_error(inds, coefs.data()); - - if(util_funcs::norm(_error.get(), _n_samp) < min_error) + double error = util_funcs::norm(_error.get(), _n_samp); + if(error < min_errors.back()) { - std::copy_n(inds.begin(), inds.size(), inds_min.begin()); - min_error = util_funcs::norm(_error.get(), _n_samp); + int rr = 0; + while((rr < _n_residual) && (error > min_errors[rr])) + ++rr; + inds_min.insert(inds_min.begin() + rr, inds); + min_errors.insert(min_errors.begin() + rr, error); + inds_min.pop_back(); + min_errors.pop_back(); } - ++rr; } while(std::next_permutation(v.begin(), v.end()) && (rr < start_end[1])); + std::vector<std::vector<double>> all_min_error; + std::vector<std::vector<std::vector<int>>> all_inds_min; - std::vector<double> all_min_error; - std::vector<std::vector<int>> all_inds_min; - - mpi::all_gather(*_mpi_comm, min_error, all_min_error); + mpi::all_gather(*_mpi_comm, min_errors, all_min_error); mpi::all_gather(*_mpi_comm, inds_min, all_inds_min); - int argmin = std::min_element(all_min_error.begin(), all_min_error.end()) - all_min_error.begin(); + min_errors = std::vector<double>(all_min_error.size() * _n_residual); + for(int me = 0; me < all_min_error.size(); ++me) + std::copy_n(all_min_error[me].begin(), all_min_error[me].size(), &min_errors[me * _n_residual]); + inds = util_funcs::argsort(min_errors); std::vector<node_ptr> min_nodes(n_dim); - for(int ii = 0; ii < n_dim; ++ii) - min_nodes[ii] = _feat_space->phi_selected()[all_inds_min[argmin][ii]]; + std::vector<Model> models; + + for(int rr = 0; rr < _n_residual; ++rr) + { + for(int ii = 0; ii < n_dim; ++ii) + min_nodes[ii] = _feat_space->phi_selected()[all_inds_min[inds[rr] / _n_residual][inds[rr] % _n_residual][ii]]; + models.push_back(Model(_prop, _prop_test, min_nodes)); + } - _models.push_back(Model(_prop, min_nodes)); + _models.push_back(models); } diff --git a/src/descriptor_identifier/SISSORegressor.hpp b/src/descriptor_identifier/SISSORegressor.hpp index 8e3da96b..ab4e7e68 100644 --- a/src/descriptor_identifier/SISSORegressor.hpp +++ b/src/descriptor_identifier/SISSORegressor.hpp @@ -17,6 +17,7 @@ protected: int _n_samp; //!< the number of samples per feature int _n_dim; //!< Number of dimensions to calculate + int _n_residual; //!< Number of residuals to pass to the next sis model int _lwork; //!< size of the work array int _rank; //!< Ranks for the least squares problem @@ -27,8 +28,9 @@ protected: std::unique_ptr<double[]> _work; //!< The work array for least squares problems std::unique_ptr<double[]> _s; //!< The S array for least squares problems - std::vector<Model> _models; //!< List of models + std::vector<std::vector<Model>> _models; //!< List of models std::vector<double> _prop; //!< Property array + std::vector<double> _prop_test; //!< Property array public: /** @@ -37,7 +39,7 @@ public: * @param prop Property to model * @param n_dim Maximum dimension of the model */ - SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim); + SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, int n_dim, int n_residual); /** * @brief Get the optimal size of the working array @@ -49,7 +51,7 @@ public: /** * @brief Preform Least squares optimization -\ * + * * @param inds Feature indexes to get the model of * @param coeffs Coefficients for the model */ @@ -96,7 +98,7 @@ public: /** * @brief Acessor function for models */ - inline std::vector<Model>& models(){return _models;} + inline std::vector<std::vector<Model>>& models(){return _models;} /** * @brief Acessor function for n_samp @@ -108,6 +110,11 @@ public: */ inline int n_dim(){return _n_dim;} + /** + * @brief Acessor function for n_residual + */ + inline int n_residual(){return _n_residual;} + /** * @brief Acessor function for lwork */ diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index e9615bbd..a2f6f631 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -50,8 +50,8 @@ FeatureSpace::FeatureSpace( _un_operators.push_back(allowed_op_maps::unary_operator_map[op]); } generate_feature_space(); - _scores.resize(_phi.size()); _scores.reserve(_phi.size()); + _scores.resize(_phi.size()); } void FeatureSpace::generate_feature_space() @@ -139,13 +139,15 @@ void FeatureSpace::generate_feature_space() ++feat_ind; } } - if(nn <= _n_rung_store) { bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store); node_value_arrs::resize_values_arr(_n_rung_store, _phi.size(), use_temp); for(int ff = _start_gen[0]; ff < _phi.size(); ++ff) + { _phi[ff]->set_value(); + _phi[ff]->set_test_value(); + } } } else @@ -261,30 +263,35 @@ void FeatureSpace::generate_feature_space() { _phi[ff]->reindex(ff + n_feat_below_rank, ff); _phi[ff]->set_value(); + _phi[ff]->set_test_value(); } } } _n_feat = _phi.size(); } -void FeatureSpace::project_r(double* prop) +void FeatureSpace::project_r(double* prop, int size) { std::vector<double> scores(_phi.size(), 0.0); for(int ff = 0; ff < _phi.size(); ++ff) - _scores[ff] = -1.0 * std::abs(util_funcs::r(prop, _phi[ff]->value_ptr(), _n_samp)); + _scores[ff] = -1.0 * std::abs(util_funcs::r(&prop[0], _phi[ff]->value_ptr(), _n_samp)); + + for(int pp = 1; pp < size / _n_samp; ++pp) + { + for(int ff = 0; ff < _phi.size(); ++ff) + scores[ff] = -1.0 * std::abs(util_funcs::r(&prop[_n_samp*pp], _phi[ff]->value_ptr(), _n_samp)); + std::transform(scores.begin(), scores.end(), _scores.begin(), _scores.begin(), [](double s1, double s2){return std::min(s1, s2);}); + } } void FeatureSpace::sis(std::vector<double>& prop) { - // while(true) - // {} int cur_feat = node_value_arrs::N_SELECTED; node_value_arrs::resize_d_matrix_arr(_n_sis_select); _phi_selected.reserve(_phi_selected.size() + _n_sis_select); - project_r(prop.data()); - + project_r(prop.data(), prop.size()); std::vector<int> inds = util_funcs::argsort(_scores); std::vector<double> scores_selected(_n_sis_select, 0.0); @@ -305,7 +312,7 @@ void FeatureSpace::sis(std::vector<double>& prop) // Check the feature against those selected from previous SIS iterations for(int dd = 0; dd < cur_feat; ++dd) { - if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13) + if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), _phi[inds[ii]]->value_ptr(), _n_samp)) < 1e-10) { is_valid = false; break; @@ -320,7 +327,7 @@ void FeatureSpace::sis(std::vector<double>& prop) if(*std::min_element(scores_comp.begin(), scores_comp.begin() + cur_feat_local) < 1e-10) { int dd = std::min_element(scores_comp.begin(), scores_comp.begin() + cur_feat_local) - scores_comp.begin(); - if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13) + if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), _phi[inds[ii]]->value_ptr(), _n_samp)) < 1e-10) is_valid = false; } @@ -328,7 +335,7 @@ void FeatureSpace::sis(std::vector<double>& prop) { inds_selected[cur_feat_local] = _phi[inds[ii]]->feat_ind(); scores_selected[cur_feat_local] = _scores[inds[ii]]; - phi_selected.push_back(std::make_shared<FeatureNode>(cur_feat + cur_feat_local, _phi[inds[ii]]->expr(), _phi[inds[ii]]->value(), _phi[inds[ii]]->unit(), true)); + phi_selected.push_back(std::make_shared<FeatureNode>(cur_feat + cur_feat_local, _phi[inds[ii]]->expr(), _phi[inds[ii]]->value(), _phi[inds[ii]]->test_value(), _phi[inds[ii]]->unit(), true)); ++cur_feat_local; } ++ii; @@ -418,6 +425,7 @@ void FeatureSpace::sis(std::vector<double>& prop) _phi_selected.push_back(sent_phi[inds[ii]]); _phi_selected.back()->reindex(cur_feat); _phi_selected.back()->set_value(); + _phi_selected.back()->set_test_value(); ++cur_feat; } } @@ -459,7 +467,7 @@ void FeatureSpace::sis(std::vector<double>& prop) if(*std::min_element(scores_comp.begin(), scores_comp.begin() + cur_feat_local) < 1e-10) { int dd = std::min_element(scores_comp.begin(), scores_comp.begin() + cur_feat_local) - scores_comp.begin(); - if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), sent_phi[inds[ii]]->value().data(), prop.size())) < 1e-13) + if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), sent_phi[inds[ii]]->value().data(), _n_samp)) < 1e-13) is_valid = false; } @@ -468,6 +476,7 @@ void FeatureSpace::sis(std::vector<double>& prop) _phi_selected.push_back(sent_phi[inds[ii]]); _phi_selected.back()->reindex(cur_feat); _phi_selected.back()->set_value(); + _phi_selected.back()->set_test_value(); ++cur_feat_local; ++cur_feat; } diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index cf926e2f..ac3c2362 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -99,7 +99,7 @@ public: * * @param prop [description] */ - void project_r(double* prop); + void project_r(double* prop, int size); /** * @brief Perform SIS on a feature set with a specified property diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp index 18f463df..7d8ac1f3 100644 --- a/src/feature_creation/node/FeatureNode.cpp +++ b/src/feature_creation/node/FeatureNode.cpp @@ -3,14 +3,16 @@ FeatureNode::FeatureNode() {} -FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit, bool selected) : - Node(feat_ind, value.size()), +FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool selected) : + Node(feat_ind, value.size(), test_value.size()), _selected(selected), _expr(expr), _unit(unit), - _value(value) + _value(value), + _test_value(test_value) { set_value(); + set_test_value(); } FeatureNode::~FeatureNode() diff --git a/src/feature_creation/node/FeatureNode.hpp b/src/feature_creation/node/FeatureNode.hpp index 584d75ed..f6504d49 100644 --- a/src/feature_creation/node/FeatureNode.hpp +++ b/src/feature_creation/node/FeatureNode.hpp @@ -30,6 +30,7 @@ class FeatureNode: public Node ar & _expr; ar & _unit; ar & _value; + ar & _test_value; } protected: @@ -37,6 +38,7 @@ protected: std::string _expr; //!< Expression of the feature Unit _unit; //!< Unit for the feature std::vector<double> _value; //!< values for the feature + std::vector<double> _test_value; //!< test values for the feature public: /** * @brief Base Constructor @@ -50,9 +52,10 @@ public: * @param feat_ind index of the feature * @param expr Expression for the feature * @param value Value of the feature for each sample + * @param value Value of the feature for each test sample * @param unit Unit of the feature */ - FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit, bool selected=false); + FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool selected=false); ~FeatureNode(); @@ -71,10 +74,21 @@ public: */ inline std::vector<double> value(){return _value;} + /** + * @brief Get the test values of the feature + */ + inline std::vector<double> test_value(){return _test_value;} + /** * @brief Set the value for the feature */ inline void set_value(int offset = -1){std::copy_n(_value.data(), _n_samp, value_ptr());} + + /** + * @brief Set the test value for the feature + */ + inline void set_test_value(int offset = -1){std::copy_n(_test_value.data(), _n_test_samp, test_value_ptr());} + /** * @brief Check if the feature contains NaN */ @@ -99,6 +113,12 @@ public: */ inline double* value_ptr(int offset = 0){return _selected ? node_value_arrs::get_d_matrix_ptr(_arr_ind) : node_value_arrs::get_value_ptr(_arr_ind, offset);} + /** + * @brief Accessor function to the value of the feature's test set + */ + inline double* test_value_ptr(int offset = 0){return node_value_arrs::get_test_value_ptr(_arr_ind, offset);} + + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/Node.cpp b/src/feature_creation/node/Node.cpp index 86a2d8bb..d4cd1d87 100644 --- a/src/feature_creation/node/Node.cpp +++ b/src/feature_creation/node/Node.cpp @@ -3,7 +3,8 @@ Node::Node() {} -Node::Node(int feat_ind, int n_samp) : +Node::Node(int feat_ind, int n_samp, int n_test_samp) : + _n_test_samp(n_test_samp), _n_samp(n_samp), _feat_ind(feat_ind), _arr_ind(feat_ind) diff --git a/src/feature_creation/node/Node.hpp b/src/feature_creation/node/Node.hpp index 3b19745e..d3d60fad 100644 --- a/src/feature_creation/node/Node.hpp +++ b/src/feature_creation/node/Node.hpp @@ -31,13 +31,15 @@ class Node template <typename Archive> void serialize(Archive& ar, const unsigned int version) { + ar & _n_test_samp; ar & _n_samp; ar & _feat_ind; ar & _arr_ind; } protected: - int _n_samp; //!< Number of samples in the feature + int _n_test_samp; //!< Number of samples in the feature's test set + int _n_samp; //!< Number of samples in the feature's test set int _feat_ind; //!< Index of the feature int _arr_ind; //!< Index of the feature for the value arrays @@ -53,8 +55,9 @@ public: * * @param feat_ind index of the feature * @param n_samp number of samples in the node + * @param n_samp number of test samples in the node */ - Node(int feat_ind, int n_samp); + Node(int feat_ind, int n_samp, int n_test_samp); virtual ~Node(); @@ -80,6 +83,11 @@ public: */ inline int n_samp(){return _n_samp;} + /** + * @brief Acesssor function to get the number of samples in the test set + */ + inline int n_test_samp(){return _n_test_samp;} + /** * @brief Accessor function to get the feature ind */ @@ -105,6 +113,11 @@ public: */ virtual std::vector<double> value() = 0; + /** + * @brief Get the test_value of the descriptor + */ + virtual std::vector<double> test_value() = 0; + virtual std::vector<std::shared_ptr<Node>> feats() = 0; /** @@ -117,6 +130,16 @@ public: */ virtual double* value_ptr(int offset = 0) = 0; + /** + * @brief Set the test value for the feature + */ + virtual void set_test_value(int offset = -1) = 0; + + /** + * @brief Accessor function to the test value of the feature + */ + virtual double* test_value_ptr(int offset = 0) = 0; + /** * @brief Check if the feature contains NaN */ diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.cpp b/src/feature_creation/node/operator_nodes/OperatorNode.cpp index 6999d464..80b6ea13 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.cpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.cpp @@ -4,7 +4,7 @@ OperatorNode::OperatorNode() {} OperatorNode::OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) : - Node(feat_ind, feats[0]->n_samp()), + Node(feat_ind, feats[0]->n_samp(), feats[0]->n_test_samp()), _feats(feats) {} @@ -23,6 +23,18 @@ double* OperatorNode::value_ptr(int offset) return node_value_arrs::get_value_ptr(_arr_ind, offset); } +double* OperatorNode::test_value_ptr(int offset) +{ + offset = (offset == -1) ? rung() : offset; + if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_test_reg(_arr_ind, offset) != _arr_ind)) + { + set_test_value(offset); + node_value_arrs::temp_storage_test_reg(_arr_ind, offset) = _arr_ind; + } + + return node_value_arrs::get_test_value_ptr(_arr_ind, offset); +} + std::vector<double> OperatorNode::value() { std::vector<double> val(_n_samp, 0.0); @@ -30,4 +42,11 @@ std::vector<double> OperatorNode::value() return val; } +std::vector<double> OperatorNode::test_value() +{ + std::vector<double> val(_n_test_samp, 0.0); + std::copy_n(test_value_ptr(), _n_test_samp, val.data()); + return val; +} + BOOST_SERIALIZATION_ASSUME_ABSTRACT(OperatorNode) diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.hpp b/src/feature_creation/node/operator_nodes/OperatorNode.hpp index 5c32779c..d8aaa8bb 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.hpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.hpp @@ -55,10 +55,14 @@ public: std::vector<double> value(); + std::vector<double> test_value(); + inline std::vector<node_ptr> feats(){return _feats;} virtual void set_value(int offset = -1) = 0; + virtual void set_test_value(int offset = -1) = 0; + /** * @brief Get the pointer to the feature's data * @details If the feature is not already stored in memory, then calculate the feature and return the pointer to the data @@ -69,6 +73,16 @@ public: */ double* value_ptr(int offset=-1); + /** + * @brief Get the pointer to the feature's data + * @details If the feature is not already stored in memory, then calculate the feature and return the pointer to the data + * + * @param offset the integer value to offset the location in the temporary storage array + * + * @return pointer to the feature's test values + */ + double* test_value_ptr(int offset=-1); + /** * @brief Check if the feature contains NaN */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp index 1dc3b2bc..ae7ee26a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp @@ -18,7 +18,7 @@ AbsDiffNode::AbsDiffNode(std::vector<node_ptr> feats, int rung, int feat_ind) : if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -29,6 +29,8 @@ AbsDiffNode::AbsDiffNode(std::vector<node_ptr> feats, int rung, int feat_ind) : set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind) : @@ -44,17 +46,20 @@ AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_in if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; if((std::abs(add_sub_tot_first) > 1) && std::all_of(add_sub_leaves.begin(), add_sub_leaves.end(), [&add_sub_tot_first](auto el){return el.second == add_sub_tot_first;})) throw InvalidFeatureException(); + set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); - } + + set_test_value(); +} void AbsDiffNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) { @@ -75,5 +80,5 @@ void AbsDiffNode::update_div_mult_leaves(std::map<std::string, double>& div_mult else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.hpp index 2076e44f..7434f27e 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::abs_diff(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::abs_diff(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp index d740f802..7c01d21a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp @@ -11,6 +11,8 @@ AbsNode::AbsNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } AbsNode::AbsNode(node_ptr feat, int rung, int feat_ind): @@ -19,6 +21,8 @@ AbsNode::AbsNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void AbsNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -40,5 +44,5 @@ void AbsNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.hpp index 94c68f4d..11cb5c7c 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::abs(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::abs(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.cpp index 29223393..0a4742b5 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.cpp @@ -16,7 +16,7 @@ AddNode::AddNode(std::vector<node_ptr> feats, int rung, int feat_ind): if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -27,6 +27,8 @@ AddNode::AddNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): @@ -42,7 +44,7 @@ AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -53,6 +55,8 @@ AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void AddNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -69,5 +73,5 @@ void AddNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.hpp index 39644de9..de207aa7 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::add(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::add(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp index 10167f15..697094e2 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp @@ -15,6 +15,8 @@ CosNode::CosNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } CosNode::CosNode(node_ptr feat, int rung, int feat_ind): @@ -29,6 +31,8 @@ CosNode::CosNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void CosNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -50,6 +54,6 @@ void CosNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.hpp index 54f77a23..97eda826 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::cos(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::cos(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.cpp index 650817c1..d98cc3e3 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.cpp @@ -12,6 +12,8 @@ CbNode::CbNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } CbNode::CbNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ CbNode::CbNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void CbNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.hpp index 7cabb1bf..420af5ca 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::cb(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::cb(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.cpp index 8c0f7b16..b824bec6 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.cpp @@ -12,6 +12,8 @@ CbrtNode::CbrtNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } CbrtNode::CbrtNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ CbrtNode::CbrtNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void CbrtNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.hpp index 2486e4cd..b69afd46 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::cbrt(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::cbrt(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.cpp index 74961765..6aa3db93 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.cpp @@ -19,14 +19,16 @@ DivNode::DivNode(std::vector<node_ptr> feats, int rung, int feat_ind): if(std::abs(std::accumulate(div_mult_leaves.begin(), div_mult_leaves.end(), -1.0*expected_abs_tot, [](double tot, auto el){return tot + std::abs(el.second);})) > 1e-12) throw InvalidFeatureException(); - int div_mult_tot_first = div_mult_leaves.begin()->second; + double div_mult_tot_first = div_mult_leaves.begin()->second; - if((std::abs(div_mult_tot_first) > 1) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) + if((std::abs(div_mult_tot_first) != 1.0) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) throw InvalidFeatureException(); set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } DivNode::DivNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): @@ -45,14 +47,16 @@ DivNode::DivNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): if(std::abs(std::accumulate(div_mult_leaves.begin(), div_mult_leaves.end(), -1.0*expected_abs_tot, [](double tot, auto el){return tot + std::abs(el.second);})) > 1e-12) throw InvalidFeatureException(); - int div_mult_tot_first = div_mult_leaves.begin()->second; + double div_mult_tot_first = div_mult_leaves.begin()->second; - if((std::abs(div_mult_tot_first) > 1) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) + if((std::abs(div_mult_tot_first) != 1.0) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) throw InvalidFeatureException(); set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void DivNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.hpp index 073c9dc9..7b06b84e 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::div(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::div(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.cpp index 8e81ea5f..c12d2397 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.cpp @@ -15,6 +15,8 @@ ExpNode::ExpNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } ExpNode::ExpNode(node_ptr feat, int rung, int feat_ind): @@ -29,6 +31,8 @@ ExpNode::ExpNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void ExpNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -50,5 +54,5 @@ void ExpNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.hpp index 7413f649..4903e366 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::exp(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::exp(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.cpp index bdccb2d1..b8f0e456 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.cpp @@ -12,6 +12,8 @@ InvNode::InvNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } InvNode::InvNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ InvNode::InvNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void InvNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp index 17749864..8147863d 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::inv(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::inv(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.cpp index cab91e51..d0af2839 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.cpp @@ -15,6 +15,8 @@ LogNode::LogNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } LogNode::LogNode(node_ptr feat, int rung, int feat_ind): @@ -29,6 +31,8 @@ LogNode::LogNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void LogNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -50,5 +54,5 @@ void LogNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.hpp index 56395dd6..864b60c7 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::log(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::log(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.cpp index c92d37ef..bc0e13fa 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.cpp @@ -16,14 +16,16 @@ MultNode::MultNode(std::vector<node_ptr> feats, int rung, int feat_ind): if(std::abs(std::accumulate(div_mult_leaves.begin(), div_mult_leaves.end(), -1.0*expected_abs_tot, [](double tot, auto el){return tot + std::abs(el.second);})) > 1e-12) throw InvalidFeatureException(); - int div_mult_tot_first = div_mult_leaves.begin()->second; + double div_mult_tot_first = div_mult_leaves.begin()->second; - if((std::abs(div_mult_tot_first) > 1) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) + if((std::abs(div_mult_tot_first) != 1.0) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) throw InvalidFeatureException(); set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } MultNode::MultNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): @@ -39,14 +41,16 @@ MultNode::MultNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): if(std::abs(std::accumulate(div_mult_leaves.begin(), div_mult_leaves.end(), -1.0*expected_abs_tot, [](double tot, auto el){return tot + std::abs(el.second);})) > 1e-12) throw InvalidFeatureException(); - int div_mult_tot_first = div_mult_leaves.begin()->second; + double div_mult_tot_first = div_mult_leaves.begin()->second; - if((std::abs(div_mult_tot_first) > 1) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) + if((std::abs(div_mult_tot_first) - 1.0 > 1e-12) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) throw InvalidFeatureException(); set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void MultNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.hpp index 55fbd06d..f78a0ed7 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::mult(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::mult(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp index 4776b396..b4761ebe 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp @@ -52,5 +52,5 @@ void NegExpNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_ else div_mult_leaves[key] = -1.0 * fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.hpp index 78c07cb6..c7dde942 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::neg_exp(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); }; + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::neg_exp(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + }; + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp index 5aafb053..3ab1f7e5 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp @@ -15,6 +15,8 @@ SinNode::SinNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SinNode::SinNode(node_ptr feat, int rung, int feat_ind): @@ -29,6 +31,8 @@ SinNode::SinNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SinNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -50,5 +54,5 @@ void SinNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.hpp index 7aee79ea..cb606b10 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::sin(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::sin(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.cpp index 4c85272e..a74ef6d6 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.cpp @@ -12,6 +12,8 @@ SixPowNode::SixPowNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SixPowNode::SixPowNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ SixPowNode::SixPowNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SixPowNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp index 39c0a111..8fb575b4 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::sixth_pow(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::sixth_pow(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.cpp index 4523668d..7bfb9c6e 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.cpp @@ -12,6 +12,8 @@ SqNode::SqNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SqNode::SqNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ SqNode::SqNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SqNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.hpp index 5a743e40..dc5a7acc 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.hpp @@ -29,6 +29,11 @@ public: allowed_op_funcs::sq(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::sq(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.cpp index 50cd2108..0f30bb2b 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.cpp @@ -12,6 +12,8 @@ SqrtNode::SqrtNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SqrtNode::SqrtNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ SqrtNode::SqrtNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SqrtNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.hpp index 7b776964..07d2736e 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::sqrt(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::sqrt(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.cpp index 801d9d77..0760bc8b 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.cpp @@ -16,7 +16,7 @@ SubNode::SubNode(std::vector<node_ptr> feats, int rung, int feat_ind): if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -27,6 +27,8 @@ SubNode::SubNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SubNode::SubNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): @@ -42,7 +44,7 @@ SubNode::SubNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -53,6 +55,8 @@ SubNode::SubNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SubNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -69,5 +73,5 @@ void SubNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp index c645a665..2c946896 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::sub(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::mult(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/functions.hpp b/src/feature_creation/node/operator_nodes/functions.hpp index f14cd8eb..e60e753f 100644 --- a/src/feature_creation/node/operator_nodes/functions.hpp +++ b/src/feature_creation/node/operator_nodes/functions.hpp @@ -27,7 +27,7 @@ namespace allowed_op_funcs */ inline void sub(int size, double* in_0, double* in_1, double* out) { - std::transform(in_0, in_0 + size, in_1, out, std::minus<double>()); + std::transform(in_0, in_0 + size, in_1, out, [](double in_0, double in_1){return in_0 - in_1;}); } /** @@ -63,7 +63,7 @@ namespace allowed_op_funcs */ inline void div(int size, double* in_0, double* in_1, double* out) { - std::transform(in_0, in_0 + size, in_1, out, std::divides<double>()); + std::transform(in_0, in_0 + size, in_1, out, [](double in_0, double in_1){return in_0 / in_1;}); } /** diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.cpp b/src/feature_creation/node/value_storage/nodes_value_containers.cpp index 1da1c91a..6ab863bb 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.cpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.cpp @@ -4,12 +4,17 @@ int node_value_arrs::N_SELECTED; int node_value_arrs::N_SAMPLES; int node_value_arrs::N_STORE_FEATURES; int node_value_arrs::N_RUNGS_STORED; +int node_value_arrs::N_SAMPLES_TEST; std::vector<int> node_value_arrs::TEMP_STORAGE_REG; +std::vector<int> node_value_arrs::TEMP_STORAGE_TEST_REG; std::vector<double> node_value_arrs::D_MATRIX; std::vector<double> node_value_arrs::VALUES_ARR; +std::vector<double> node_value_arrs::TEST_VALUES_ARR; std::vector<double> node_value_arrs::TEMP_STORAGE_ARR; +std::vector<double> node_value_arrs::TEMP_STORAGE_TEST_ARR; + int node_value_arrs::get_number_new_features(std::string new_op, int n_current_features) { @@ -51,16 +56,21 @@ int node_value_arrs::get_max_number_features(std::vector<std::string> allowed_op return n_feats; } -void node_value_arrs::initialize_values_arr(int n_samples, int n_primary_feat) +void node_value_arrs::initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat) { N_SAMPLES = n_samples; + N_SAMPLES_TEST = n_samples_test; N_RUNGS_STORED = 0; N_STORE_FEATURES = n_primary_feat; VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES); + TEST_VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES_TEST); TEMP_STORAGE_ARR = std::vector<double>(3 * N_STORE_FEATURES * N_SAMPLES); TEMP_STORAGE_REG = std::vector<int>(3 * N_STORE_FEATURES, -1); + + TEMP_STORAGE_TEST_ARR = std::vector<double>(3 * N_STORE_FEATURES * N_SAMPLES_TEST); + TEMP_STORAGE_TEST_REG = std::vector<int>(3 * N_STORE_FEATURES, -1); } void node_value_arrs::resize_values_arr(int n_dims, int n_feat, bool use_temp) @@ -71,13 +81,22 @@ void node_value_arrs::resize_values_arr(int n_dims, int n_feat, bool use_temp) VALUES_ARR.resize(N_STORE_FEATURES * N_SAMPLES); VALUES_ARR.shrink_to_fit(); + TEST_VALUES_ARR.resize(N_STORE_FEATURES * N_SAMPLES_TEST); + TEST_VALUES_ARR.shrink_to_fit(); + if(use_temp) { TEMP_STORAGE_ARR.resize(3 * N_STORE_FEATURES * N_SAMPLES); TEMP_STORAGE_ARR.shrink_to_fit(); - TEMP_STORAGE_REG.resize(3 * N_STORE_FEATURES * N_SAMPLES, - 1); + TEMP_STORAGE_REG.resize(3 * N_STORE_FEATURES, - 1); TEMP_STORAGE_REG.shrink_to_fit(); + + TEMP_STORAGE_TEST_ARR.resize(3 * N_STORE_FEATURES * N_SAMPLES_TEST); + TEMP_STORAGE_TEST_ARR.shrink_to_fit(); + + TEMP_STORAGE_TEST_REG.resize(3 * N_STORE_FEATURES, - 1); + TEMP_STORAGE_TEST_REG.shrink_to_fit(); } else { @@ -94,6 +113,14 @@ double* node_value_arrs::get_value_ptr(int ind, int offset) return access_temp_storage((ind % N_STORE_FEATURES) + (offset % 3) * N_STORE_FEATURES); } +double* node_value_arrs::get_test_value_ptr(int ind, int offset) +{ + if(ind < N_STORE_FEATURES) + return access_test_value_arr(ind); + temp_storage_test_reg(ind, offset) = ind; + return access_temp_storage_test((ind % N_STORE_FEATURES) + (offset % 3) * N_STORE_FEATURES); +} + void node_value_arrs::initialize_d_matrix_arr() { N_SELECTED = 0; diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.hpp b/src/feature_creation/node/value_storage/nodes_value_containers.hpp index 54d8c5d6..c992eaa3 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.hpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.hpp @@ -11,13 +11,17 @@ namespace node_value_arrs { extern int N_SELECTED; //!< Number of features selected extern int N_SAMPLES; //!< Number of samples in the nodes + extern int N_SAMPLES_TEST; //!< Number of samples in the nodes extern int N_STORE_FEATURES; //!< Number of features with stored values extern int N_RUNGS_STORED; //!< Number of rungs with values stored extern std::vector<int> TEMP_STORAGE_REG; //!< Register to see which feature is stored in each slot + extern std::vector<int> TEMP_STORAGE_TEST_REG; //!< Register to see which feature is stored in each slot extern std::vector<double> D_MATRIX; //!< The descriptor matrix extern std::vector<double> VALUES_ARR; //!< Value of the stored features + extern std::vector<double> TEST_VALUES_ARR; //!< Value of the stored features test values extern std::vector<double> TEMP_STORAGE_ARR; //!< Array to temporarily store feature values + extern std::vector<double> TEMP_STORAGE_TEST_ARR; //!< Array to temporarily store feature values /** * @brief Get the maximum number of new features for each rung @@ -44,9 +48,10 @@ namespace node_value_arrs * @details Take initial parameters and construct the feature arraies * * @param n_samples number of samples per feature + * @param n_samples_test number of test samples per feature * @param n_primary_feat number of primary features */ - void initialize_values_arr(int n_samples, int n_primary_feat); + void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat); /** * @brief set of the value arrays @@ -85,6 +90,16 @@ namespace node_value_arrs */ inline int& temp_storage_reg(int ind, int offset = 0){return TEMP_STORAGE_REG[(ind % N_STORE_FEATURES) + (offset % 3) * N_STORE_FEATURES];} + /** + * @brief Get a reference slot/feature test register + * + * @param ind Feature index + * @param offset Offset integer for TEMP_STORE_TEST_ARRAY + * + * @return The register element for a given feature index and offset + */ + inline int& temp_storage_test_reg(int ind, int offset = 0){return TEMP_STORAGE_TEST_REG[(ind % N_STORE_FEATURES) + (offset % 3) * N_STORE_FEATURES];} + /** * @brief Access element of the permanent storage array * @@ -94,6 +109,15 @@ namespace node_value_arrs */ inline double* access_value_arr(int feature_ind){return VALUES_ARR.data() + feature_ind*N_SAMPLES;} + /** + * @brief Access element of the test value array + * + * @param feature_ind The feature index to access + * + * @return pointer to the feature;s test data array + */ + inline double* access_test_value_arr(int feature_ind){return TEST_VALUES_ARR.data() + feature_ind*N_SAMPLES_TEST;} + /** * @brief Access element of temporary storage array * @@ -103,6 +127,15 @@ namespace node_value_arrs */ inline double* access_temp_storage(int slot){return TEMP_STORAGE_ARR.data() + slot*N_SAMPLES;} + /** + * @brief Access element of temporary storage test array + * + * @param slot The slot of the temporary storage test arrays + * + * @return pointer to the feature's temporary test storage + */ + inline double* access_temp_storage_test(int slot){return TEMP_STORAGE_TEST_ARR.data() + slot*N_SAMPLES_TEST;} + /** * @brief Access the value_ptr to a feature * @@ -113,6 +146,16 @@ namespace node_value_arrs */ double* get_value_ptr(int ind, int offset = 0); + /** + * @brief Access the test_value_ptr to a feature + * + * @param ind Feature index + * @param offset the offset for the storage + * + * @return The value pointer + */ + double* get_test_value_ptr(int ind, int offset = 0); + /** * @brief Get the pointer to a primary feature * diff --git a/src/feature_creation/units/Unit.cpp b/src/feature_creation/units/Unit.cpp index 253516c2..0347bfdc 100644 --- a/src/feature_creation/units/Unit.cpp +++ b/src/feature_creation/units/Unit.cpp @@ -134,6 +134,13 @@ bool Unit::equal(Unit unit_2) else if(_dct[el.first] != el.second) return false; } + for(auto& el : _dct) + { + if(unit_2.dct().count(el.first) == 0) + return false; + else if(unit_2.dct()[el.first] != el.second) + return false; + } if(unit_2.dct().size() == 0) { for(auto& el : _dct) diff --git a/src/inputs/InputParser.cpp b/src/inputs/InputParser.cpp index 435930e5..f362c51a 100644 --- a/src/inputs/InputParser.cpp +++ b/src/inputs/InputParser.cpp @@ -5,17 +5,37 @@ InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::sh _n_sis_select(IP.get<int>("_n_sis_select")), _max_rung(IP.get<int>("max_rung")), _max_store_rung(IP.get<int>("n_rung_store", _max_rung - 1)), - _n_samp(0), + _n_samp(-1), + _n_residuals(IP.get<int>("n_residual", 1)), _filename(fn), _data_file(IP.get<std::string>("data_file", "data.csv")), _prop_key(IP.get<std::string>("property_key", "prop")), + _leave_out_inds(as_vector<int>(IP, "leave_out_inds")), _opset(as_vector<std::string>(IP, "opset")) { + std::ifstream data_stream; std::string line; data_stream.open(_data_file, std::ios::in); while (std::getline(data_stream, line)) ++_n_samp; + _n_leave_out = IP.get<int>("n_leave_out", static_cast<int>(std::round(_n_samp * IP.get<double>("leave_out_frac", 0.0)))); + if((_leave_out_inds.size() == 0) && _n_leave_out > 0) + { + _leave_out_inds = std::vector<int>(_n_leave_out); + + unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + std::vector<int> indexes(_n_samp, 0); + for(int ii = 0; ii < _n_samp; ++ii) + indexes[ii] = ii; + std::shuffle (indexes.begin(), indexes.end(), std::default_random_engine(seed)); + + std::copy_n(indexes.begin(), _n_leave_out, _leave_out_inds.begin()); + } + else if((_n_leave_out == 0) && (_leave_out_inds.size() > 0)) + _n_leave_out = _leave_out_inds.size(); + else if(_leave_out_inds.size() != _n_leave_out) + throw std::logic_error("n_leave_out is not the same size as leave_out_inds"); if(_opset.size() == 0) { @@ -81,14 +101,16 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm) } else { - std::cout << name_unit_split.size() << std::endl; throw std::logic_error("Invalid feature name \"" + split_line[dd] + "\" in header of file"); } } - std::vector<std::vector<double>> data(headers.size(), std::vector<double>(_n_samp - 1)); + std::vector<std::vector<double>> data(headers.size(), std::vector<double>(_n_samp - _n_leave_out)); + std::vector<std::vector<double>> test_data(headers.size(), std::vector<double>(_n_leave_out)); std::vector<std::string> bad_samples; int cur_line = 0; + int n_train_samp = 0; + int n_test_samp = 0; while (std::getline(data_stream, line)) { boost::algorithm::split(split_line, line, [](char c) {return c==',';}); @@ -97,8 +119,18 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm) if(split_line.size() != headers.size() + 1) bad_samples.push_back(split_line[0]); - for(int ii = 1; ii < split_line.size(); ++ii) - data[ii-1][cur_line] = std::stod(split_line[ii]); + if(std::any_of(_leave_out_inds.begin(), _leave_out_inds.end(), [&cur_line](int ind){return ind == cur_line;})) + { + for(int ii = 1; ii < split_line.size(); ++ii) + test_data[ii-1][n_test_samp] = std::stod(split_line[ii]); + ++n_test_samp; + } + else + { + for(int ii = 1; ii < split_line.size(); ++ii) + data[ii-1][n_train_samp] = std::stod(split_line[ii]); + ++n_train_samp; + } ++cur_line; } @@ -113,10 +145,7 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm) int _propind = 0; while((headers[_propind] != _prop_key) && (_propind < headers.size())) - { - std::cout << _prop_key << '\t' << headers[_propind] << std::endl; ++_propind; - } if(_propind >= headers.size()) { @@ -124,18 +153,23 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm) } else { - _prop = std::vector<double>(data[_propind].size(), 0.0); - std::copy_n(data[_propind].begin(), data[_propind].size(), _prop.begin()); + _prop_train = std::vector<double>(data[_propind].size(), 0.0); + _prop_test = std::vector<double>(test_data[_propind].size(), 0.0); + + std::copy_n(data[_propind].begin(), data[_propind].size(), _prop_train.begin()); + std::copy_n(test_data[_propind].begin(), test_data[_propind].size(), _prop_test.begin()); data.erase(data.begin() + _propind); + test_data.erase(test_data.begin() + _propind); headers.erase(headers.begin() + _propind); units.erase(units.begin() + _propind); } - node_value_arrs::initialize_values_arr(_prop.size(), headers.size()); + node_value_arrs::initialize_values_arr(_prop_train.size(), _prop_test.size(), headers.size()); + std::vector<node_ptr> phi_0; for(int ff = 0; ff < headers.size(); ++ff) - phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], units[ff])); + phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], test_data[ff], units[ff])); _feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _max_rung, _n_sis_select, _max_store_rung); } diff --git a/src/inputs/InputParser.hpp b/src/inputs/InputParser.hpp index b1c6c7a0..e0ec8b10 100644 --- a/src/inputs/InputParser.hpp +++ b/src/inputs/InputParser.hpp @@ -11,6 +11,10 @@ #include <iostream> #include <fstream> +#include <algorithm> // std::shuffle +#include <random> // std::default_random_engine +#include <chrono> // std::chrono::system_clock + #include <feature_creation/feature_space/FeatureSpace.hpp> #include <feature_creation/node/value_storage/nodes_value_containers.hpp> @@ -27,14 +31,18 @@ public: int _max_store_rung; int _n_sis_select; int _n_samp; + int _n_residuals; + int _n_leave_out; std::string _filename; std::string _data_file; std::string _prop_key; + std::vector<int> _leave_out_inds; + std::vector<double> _prop_train; + std::vector<double> _prop_test; std::vector<std::string> _opset; - std::vector<double> _prop; InputParser(boost::property_tree::ptree IP, std::string fn, std::shared_ptr<MPI_Interface> comm); inline std::shared_ptr<FeatureSpace> feat_space(){return _feat_space;} diff --git a/src/main.cpp b/src/main.cpp index 0c6aea62..7d51ce4f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -37,15 +37,20 @@ int main(int argc, char const *argv[]) std::cout<< "time input_parsing/Feature space generation: "<< duration << std::endl; node_value_arrs::initialize_d_matrix_arr(); - SISSORegressor sisso(IP._feat_space, IP._prop, IP._n_dim); + SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._n_dim, IP._n_residuals); sisso.fit(); if(comm->rank() == 0) { - for(auto& model : sisso.models()) + for(int ii = 0; ii < sisso.models().size(); ++ii) { - std::cout << model.rmse() << std::endl; - std::cout << model << '\n' << std::endl; + std::cout << sisso.models()[ii][0].rmse() << std::endl; + std::cout << sisso.models()[ii][0] << "\n" << std::endl; + for(int jj = 0; jj < sisso.models()[ii].size(); ++jj) + { + sisso.models()[ii][jj].train_to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat"); + sisso.models()[ii][jj].test_to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat"); + } } } return 0; -- GitLab