diff --git a/src/descriptor_identifier/Model/Model.cpp b/src/descriptor_identifier/Model/Model.cpp index 6ffb64ec6928d77d772f791ed7d3cda02703aa3b..3a3e09ae0ffdbda55898a9a5a344445b1f296cd8 100644 --- a/src/descriptor_identifier/Model/Model.cpp +++ b/src/descriptor_identifier/Model/Model.cpp @@ -1,70 +1,46 @@ #include <descriptor_identifier/Model/Model.hpp> -Model::Model(std::vector<double> prop, std::vector<std::shared_ptr<Node>> feats) : - _n_samp(feats[0]->n_samp()), +Model::Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<Node>> feats) : + _n_samp_train(feats[0]->n_samp()), + _n_samp_test(feats[0]->n_test_samp()), _n_dim(feats.size() + 1), _feats(feats), - _coefs(new double[_n_dim]), - _prop(new double[_n_samp]), - _error(new double[_n_samp]), - _D(new double[_n_samp * _n_dim]), - _prop_est(_n_samp, 0.0) + _coefs(_n_dim), + _prop_train(prop_train), + _prop_test(prop_test), + _train_error(_n_samp_train), + _test_error(_n_samp_test), + _D_train(_n_samp_train * _n_dim), + _D_test(_n_samp_test * _n_dim), + _prop_train_est(_n_samp_train, 0.0), + _prop_test_est(_n_samp_test, 0.0) { - _prop_est.reserve(_n_samp); + _prop_train_est.reserve(_n_samp_train); + _prop_test_est.reserve(_n_samp_test); - std::copy_n(prop.begin(), _n_samp, _prop.get()); - - std::vector<double> a(_n_samp * _n_dim, 1.0); + std::vector<double> a(_n_samp_train * _n_dim, 1.0); for(int ff = 0; ff < feats.size(); ++ff) { - std::copy_n(feats[ff]->value_ptr(), _n_samp, _D.get() + ff * _n_samp); - std::copy_n(feats[ff]->value_ptr(), _n_samp, a.begin() + ff * _n_samp); + std::copy_n(feats[ff]->value_ptr(), _n_samp_train, _D_train.data() + ff * _n_samp_train); + std::copy_n(feats[ff]->test_value_ptr(), _n_samp_test, _D_test.data() + ff * _n_samp_test); + std::copy_n(feats[ff]->value_ptr(), _n_samp_train, a.data() + ff * _n_samp_train); } - std::copy_n(a.begin() + feats.size() * _n_samp, _n_samp, _D.get() + feats.size() * _n_samp); + std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_train, _D_train.data() + feats.size() * _n_samp_train); + std::copy_n(a.data() + feats.size() * _n_samp_train, _n_samp_test, _D_test.data() + feats.size() * _n_samp_test); std::vector<double> s(_n_dim, 0.0); - std::vector<double> work(_n_dim * _n_samp, 0.0); + std::vector<double> work(_n_dim * _n_samp_train, 0.0); int rank = 0; int info = 0; - dgelss_(_n_samp, _n_dim, 1, a.data(), _n_samp, prop.data(), _n_samp, s.data(), 1e-13, &rank, work.data(), work.size(), &info); - std::copy_n(prop.begin(), _n_dim, _coefs.get()); - - dgemv_('N', _n_samp, _n_dim, 1.0, _D.get(), _n_samp, _coefs.get(), 1, 0.0, _prop_est.data(), 1); + dgelss_(_n_samp_train, _n_dim, 1, a.data(), _n_samp_train, prop_train.data(), _n_samp_train, s.data(), 1e-13, &rank, work.data(), work.size(), &info); + std::copy_n(prop_train.begin(), _n_dim, _coefs.data()); - std::transform(_prop_est.begin(), _prop_est.end(), _prop.get(), _error.get(), std::minus<double>()); -} + dgemv_('N', _n_samp_train, _n_dim, 1.0, _D_train.data(), _n_samp_train, _coefs.data(), 1, 0.0, _prop_train_est.data(), 1); + dgemv_('N', _n_samp_test, _n_dim, 1.0, _D_test.data(), _n_samp_test, _coefs.data(), 1, 0.0, _prop_test_est.data(), 1); -Model::Model(Model &o) : - _n_samp(o._n_samp), - _n_dim(o._n_dim), - _feats(o._feats), - _coefs(new double[_n_dim]), - _prop(new double[_n_samp]), - _error(new double[_n_samp]), - _D(new double[_n_samp * _n_dim]), - _prop_est(o._prop_est) -{ - std::copy_n(o._coefs.get(), o._n_dim, _coefs.get()); - std::copy_n(o._prop.get(), o._n_samp, _prop.get()); - std::copy_n(o._error.get(), o._n_samp, _error.get()); - std::copy_n(o._D.get(), o._n_samp * o._n_dim, _D.get()); -} - -Model::Model(Model &&o) : - _n_samp(o._n_samp), - _n_dim(o._n_dim), - _feats(o._feats), - _coefs(new double[_n_dim]), - _prop(new double[_n_samp]), - _error(new double[_n_samp]), - _D(new double[_n_samp * _n_dim]), - _prop_est(o._prop_est) -{ - std::copy_n(o._coefs.get(), o._n_dim, _coefs.get()); - std::copy_n(o._prop.get(), o._n_samp, _prop.get()); - std::copy_n(o._error.get(), o._n_samp, _error.get()); - std::copy_n(o._D.get(), o._n_samp * o._n_dim, _D.get()); + std::transform(_prop_train_est.begin(), _prop_train_est.end(), _prop_train.data(), _train_error.data(), std::minus<double>()); + std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.data(), _test_error.data(), std::minus<double>()); } std::string Model::toString() const @@ -81,3 +57,59 @@ std::ostream& operator<< (std::ostream& outStream, const Model& model) outStream << model.toString(); return outStream; } + +void Model::train_to_file(std::string filename) +{ + boost::filesystem::path p(filename.c_str()); + boost::filesystem::create_directories(p.remove_filename()); + + std::ofstream out_file_stream = std::ofstream(); + out_file_stream.open(filename); + + out_file_stream << "# " << toString() << std::endl; + out_file_stream << "# RMSE: " << rmse() << "; Max AE( " << max_ae() << std::endl; + out_file_stream << "# coeffs:"; + for(auto& coef: _coefs) + out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";"; + out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),"; + for(int ff = 0; ff < _feats.size(); ++ff) + out_file_stream << " Feature " << ff << " Value,"; + out_file_stream << std::endl; + + for(int ss = 0; ss < _n_samp_train; ++ss) + { + out_file_stream << std::setw(24) << std::setprecision(18) << _prop_train[ss] << std::setw(24) << std::setprecision(18) << _prop_train_est[ss]; + for(int ff = 0; ff < _n_dim - 1; ++ff) + out_file_stream << std::setw(24) << std::setprecision(18) << _D_train[ss + ff * _n_samp_train]; + out_file_stream << std::endl; + } + out_file_stream.close(); +} + +void Model::test_to_file(std::string filename) +{ + boost::filesystem::path p(filename.c_str()); + boost::filesystem::create_directories(p.remove_filename()); + + std::ofstream out_file_stream = std::ofstream(); + out_file_stream.open(filename); + + out_file_stream << "# " << toString() << std::endl; + out_file_stream << "# RMSE: " << test_rmse() << "; Max AE: " << test_max_ae() << std::endl; + out_file_stream << "# coeffs:"; + for(auto& coef: _coefs) + out_file_stream << " " << std::setw(24) << std::setprecision(18) << coef << ";"; + out_file_stream << "\n# " << std::setw(23) << "Property Value," << std::setw(24) << "Property Value (EST),"; + for(int ff = 0; ff < _feats.size(); ++ff) + out_file_stream << " Feature " << ff << " Value,"; + out_file_stream << std::endl; + + for(int ss = 0; ss < _n_samp_test; ++ss) + { + out_file_stream << std::setw(24) << std::setprecision(18) << _prop_test[ss] << std::setw(24) << std::setprecision(18) << _prop_test_est[ss]; + for(int ff = 0; ff < _n_dim - 1; ++ff) + out_file_stream << std::setw(24) << std::setprecision(18) << _D_test[ss + ff * _n_samp_train]; + out_file_stream << std::endl; + } + out_file_stream.close(); +} diff --git a/src/descriptor_identifier/Model/Model.hpp b/src/descriptor_identifier/Model/Model.hpp index b4364916aa324aabb71ac07fd78c5b019bc32744..208664ade828964253a02837c3c764f5c1aa0d96 100644 --- a/src/descriptor_identifier/Model/Model.hpp +++ b/src/descriptor_identifier/Model/Model.hpp @@ -1,6 +1,12 @@ #ifndef MODEL #define MODEL +#include <boost/filesystem.hpp> + +#include<iomanip> +#include<fstream> +#include<iostream> + #include <feature_creation/node/Node.hpp> /** @@ -9,17 +15,22 @@ */ class Model { - int _n_samp; //!< The number of samples per feature + int _n_samp_train; //!< The number of samples per feature + int _n_samp_test; //!< The number of test samples per feature int _n_dim; //!< Dimension of the model std::vector<std::shared_ptr<Node>> _feats; //!< List of features in the model - std::unique_ptr<double[]> _coefs; //!< Coefficients for teh features - std::unique_ptr<double[]> _prop; //!< The property to be modeled - std::unique_ptr<double[]> _error; //!< The error of the model - std::unique_ptr<double[]> _D; //!< The Descriptor matrix + std::vector<double> _coefs; //!< Coefficients for teh features + std::vector<double> _prop_train; //!< The property to be modeled + std::vector<double> _prop_test; //!< The property to be modeled + std::vector<double> _train_error; //!< The error of the model + std::vector<double> _test_error; //!< The error of the model + std::vector<double> _D_train; //!< The Descriptor matrix + std::vector<double> _D_test; //!< The Descriptor matrix - std::vector<double> _prop_est; //!< The estimated Property + std::vector<double> _prop_train_est; //!< The estimated Property + std::vector<double> _prop_test_est; //!< The estimated Property public: /** @@ -28,21 +39,8 @@ public: * @param prop The property * @param feats The features for the model */ - Model(std::vector<double> prop, std::vector<std::shared_ptr<Node>> feats); - - /** - * @brief The copy constructor - * - * @param o The model to be copied - */ - Model(Model& o); + Model(std::vector<double> prop_train, std::vector<double> prop_test, std::vector<std::shared_ptr<Node>> feats); - /** - * @brief The Move constructor - * - * @param o The Model to be moved - */ - Model(Model&& o); /** * @brief Convert the model to a string @@ -54,27 +52,49 @@ public: /** * @brief Accessor function to _prop_est */ - inline std::vector<double>& predict(){return _prop_est;} + inline std::vector<double>& predict(){return _prop_test_est;} + + /** + * @brief Accessor function to _prop_est + */ + inline std::vector<double>& predict_train(){return _prop_train_est;} /** * @brief Copy the error into a new array * * @param res pointer to the beginning of the array */ - inline void copy_error(double* res){std::copy_n(_error.get(), _n_samp, res);} + inline void copy_error(double* res){std::copy_n(_train_error.data(), _n_samp_train, res);} /** * @brief The rmes of the model */ - inline double rmse(){return util_funcs::norm(_error.get(), _n_samp);} + inline double rmse(){return util_funcs::norm(_train_error.data(), _n_samp_train) / std::sqrt(static_cast<double>(_n_samp_train));} + inline double test_rmse(){return util_funcs::norm(_test_error.data(), _n_samp_test) / std::sqrt(static_cast<double>(_n_samp_test));} /** * @brief The max Absolute error of the array */ inline double max_ae() { - return *std::max_element(_error.get(), _error.get() + _n_samp, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}); + return *std::max_element(_train_error.data(), _train_error.data() + _n_samp_train, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}); + } + + inline double test_max_ae() + { + return *std::max_element(_test_error.data(), _test_error.data() + _n_samp_test, [](double d1, double d2){return std::abs(d1) < std::abs(d2);}); } + + + /** + * @brief Print model to a file + */ + void test_to_file(std::string filename); + + /** + * @brief Print model to a file + */ + void train_to_file(std::string filename); }; /** diff --git a/src/descriptor_identifier/SISSORegressor.cpp b/src/descriptor_identifier/SISSORegressor.cpp index f8eaac13814075e524cc45e890ef3e6c03bb696d..978dc0b6d7080df86097ab4cffbc727008f9f3ad 100644 --- a/src/descriptor_identifier/SISSORegressor.cpp +++ b/src/descriptor_identifier/SISSORegressor.cpp @@ -1,10 +1,11 @@ #include <descriptor_identifier/SISSORegressor.hpp> -SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim): +SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, int n_dim, int n_residual): _feat_space(feat_space), _mpi_comm(feat_space->mpi_comm()), _n_samp(prop.size()), _n_dim(n_dim), + _n_residual(n_residual), _lwork(-1), _rank(0), _a(new double[(_n_dim + 1) * _n_samp]), @@ -14,7 +15,8 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve _work(nullptr), _s(new double[n_dim + 1]), _models(), - _prop(prop) + _prop(prop), + _prop_test(prop_test) { // Initialize a, b, ones, s, and _error arrays @@ -77,20 +79,28 @@ void SISSORegressor::set_error(std::vector<int>& inds, double* coeffs) void SISSORegressor::fit() { - std::vector<double> residual(_n_samp); + std::vector<double> residual(_n_samp * _n_residual); std::clock_t start; double duration; start = std::clock(); + _feat_space->sis(_prop); + _mpi_comm->barrier(); duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; if(_mpi_comm->rank() == 0) std::cout << "Time for SIS: " << duration << std::endl; start = std::clock(); - std::vector<node_ptr> min_node(1, _feat_space->phi_selected()[0]); - _models.push_back(Model(_prop, min_node)); - _models[_models.size() - 1].copy_error(residual.data()); + + std::vector<Model> models; + for(int rr = 0; rr < _n_residual; ++rr) + { + models.push_back(Model(_prop, _prop_test, {_feat_space->phi_selected()[rr]})); + models.back().copy_error(&residual[rr * _n_samp]); + } + _models.push_back(models); + _mpi_comm->barrier(); duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; if(_mpi_comm->rank() == 0) @@ -98,20 +108,24 @@ void SISSORegressor::fit() for(int dd = 2; dd <= _n_dim; ++dd) { - start = std::clock(); + start = std::clock(); _feat_space->sis(residual); - _mpi_comm->barrier(); - duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; + + _mpi_comm->barrier(); + duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; if(_mpi_comm->rank() == 0) std::cout << "Time for SIS: " << duration << std::endl; + start = std::clock(); - l0_norm(residual, dd); - _mpi_comm->barrier(); + l0_norm(_prop, dd); + + _mpi_comm->barrier(); duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC; if(_mpi_comm->rank() == 0) std::cout << "Time for l0-norm: " << duration << std::endl; - _models[_models.size() - 1].copy_error(residual.data()); + for(int rr = 0; rr < _n_residual; ++rr) + _models.back()[rr].copy_error(&residual[rr * _n_samp]); } } @@ -120,9 +134,9 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) std::vector<double> coefs(n_dim + 1, 0.0); std::vector<int> inds(n_dim, 0); - std::vector<int> inds_min(n_dim); + std::vector<std::vector<int>> inds_min(_n_residual); - double min_error = util_funcs::norm(prop.data(), _n_samp); + std::vector<double> min_errors(_n_residual, util_funcs::norm(_prop.data(), _n_samp)); int n = _feat_space->phi_selected().size(); @@ -152,28 +166,40 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) least_squares(inds, coefs.data()); set_error(inds, coefs.data()); - - if(util_funcs::norm(_error.get(), _n_samp) < min_error) + double error = util_funcs::norm(_error.get(), _n_samp); + if(error < min_errors.back()) { - std::copy_n(inds.begin(), inds.size(), inds_min.begin()); - min_error = util_funcs::norm(_error.get(), _n_samp); + int rr = 0; + while((rr < _n_residual) && (error > min_errors[rr])) + ++rr; + inds_min.insert(inds_min.begin() + rr, inds); + min_errors.insert(min_errors.begin() + rr, error); + inds_min.pop_back(); + min_errors.pop_back(); } - ++rr; } while(std::next_permutation(v.begin(), v.end()) && (rr < start_end[1])); + std::vector<std::vector<double>> all_min_error; + std::vector<std::vector<std::vector<int>>> all_inds_min; - std::vector<double> all_min_error; - std::vector<std::vector<int>> all_inds_min; - - mpi::all_gather(*_mpi_comm, min_error, all_min_error); + mpi::all_gather(*_mpi_comm, min_errors, all_min_error); mpi::all_gather(*_mpi_comm, inds_min, all_inds_min); - int argmin = std::min_element(all_min_error.begin(), all_min_error.end()) - all_min_error.begin(); + min_errors = std::vector<double>(all_min_error.size() * _n_residual); + for(int me = 0; me < all_min_error.size(); ++me) + std::copy_n(all_min_error[me].begin(), all_min_error[me].size(), &min_errors[me * _n_residual]); + inds = util_funcs::argsort(min_errors); std::vector<node_ptr> min_nodes(n_dim); - for(int ii = 0; ii < n_dim; ++ii) - min_nodes[ii] = _feat_space->phi_selected()[all_inds_min[argmin][ii]]; + std::vector<Model> models; + + for(int rr = 0; rr < _n_residual; ++rr) + { + for(int ii = 0; ii < n_dim; ++ii) + min_nodes[ii] = _feat_space->phi_selected()[all_inds_min[inds[rr] / _n_residual][inds[rr] % _n_residual][ii]]; + models.push_back(Model(_prop, _prop_test, min_nodes)); + } - _models.push_back(Model(_prop, min_nodes)); + _models.push_back(models); } diff --git a/src/descriptor_identifier/SISSORegressor.hpp b/src/descriptor_identifier/SISSORegressor.hpp index 8e3da96b771c6fae042ac4f98ec25d2453c66adc..ab4e7e688ee06f735cf374bd47ccf575c03c3843 100644 --- a/src/descriptor_identifier/SISSORegressor.hpp +++ b/src/descriptor_identifier/SISSORegressor.hpp @@ -17,6 +17,7 @@ protected: int _n_samp; //!< the number of samples per feature int _n_dim; //!< Number of dimensions to calculate + int _n_residual; //!< Number of residuals to pass to the next sis model int _lwork; //!< size of the work array int _rank; //!< Ranks for the least squares problem @@ -27,8 +28,9 @@ protected: std::unique_ptr<double[]> _work; //!< The work array for least squares problems std::unique_ptr<double[]> _s; //!< The S array for least squares problems - std::vector<Model> _models; //!< List of models + std::vector<std::vector<Model>> _models; //!< List of models std::vector<double> _prop; //!< Property array + std::vector<double> _prop_test; //!< Property array public: /** @@ -37,7 +39,7 @@ public: * @param prop Property to model * @param n_dim Maximum dimension of the model */ - SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim); + SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, int n_dim, int n_residual); /** * @brief Get the optimal size of the working array @@ -49,7 +51,7 @@ public: /** * @brief Preform Least squares optimization -\ * + * * @param inds Feature indexes to get the model of * @param coeffs Coefficients for the model */ @@ -96,7 +98,7 @@ public: /** * @brief Acessor function for models */ - inline std::vector<Model>& models(){return _models;} + inline std::vector<std::vector<Model>>& models(){return _models;} /** * @brief Acessor function for n_samp @@ -108,6 +110,11 @@ public: */ inline int n_dim(){return _n_dim;} + /** + * @brief Acessor function for n_residual + */ + inline int n_residual(){return _n_residual;} + /** * @brief Acessor function for lwork */ diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index e9615bbd2a58e83b305b3c4086aaca56a47aff9d..a2f6f63105afdc5f66683acc118d44af79c451d2 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -50,8 +50,8 @@ FeatureSpace::FeatureSpace( _un_operators.push_back(allowed_op_maps::unary_operator_map[op]); } generate_feature_space(); - _scores.resize(_phi.size()); _scores.reserve(_phi.size()); + _scores.resize(_phi.size()); } void FeatureSpace::generate_feature_space() @@ -139,13 +139,15 @@ void FeatureSpace::generate_feature_space() ++feat_ind; } } - if(nn <= _n_rung_store) { bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store); node_value_arrs::resize_values_arr(_n_rung_store, _phi.size(), use_temp); for(int ff = _start_gen[0]; ff < _phi.size(); ++ff) + { _phi[ff]->set_value(); + _phi[ff]->set_test_value(); + } } } else @@ -261,30 +263,35 @@ void FeatureSpace::generate_feature_space() { _phi[ff]->reindex(ff + n_feat_below_rank, ff); _phi[ff]->set_value(); + _phi[ff]->set_test_value(); } } } _n_feat = _phi.size(); } -void FeatureSpace::project_r(double* prop) +void FeatureSpace::project_r(double* prop, int size) { std::vector<double> scores(_phi.size(), 0.0); for(int ff = 0; ff < _phi.size(); ++ff) - _scores[ff] = -1.0 * std::abs(util_funcs::r(prop, _phi[ff]->value_ptr(), _n_samp)); + _scores[ff] = -1.0 * std::abs(util_funcs::r(&prop[0], _phi[ff]->value_ptr(), _n_samp)); + + for(int pp = 1; pp < size / _n_samp; ++pp) + { + for(int ff = 0; ff < _phi.size(); ++ff) + scores[ff] = -1.0 * std::abs(util_funcs::r(&prop[_n_samp*pp], _phi[ff]->value_ptr(), _n_samp)); + std::transform(scores.begin(), scores.end(), _scores.begin(), _scores.begin(), [](double s1, double s2){return std::min(s1, s2);}); + } } void FeatureSpace::sis(std::vector<double>& prop) { - // while(true) - // {} int cur_feat = node_value_arrs::N_SELECTED; node_value_arrs::resize_d_matrix_arr(_n_sis_select); _phi_selected.reserve(_phi_selected.size() + _n_sis_select); - project_r(prop.data()); - + project_r(prop.data(), prop.size()); std::vector<int> inds = util_funcs::argsort(_scores); std::vector<double> scores_selected(_n_sis_select, 0.0); @@ -305,7 +312,7 @@ void FeatureSpace::sis(std::vector<double>& prop) // Check the feature against those selected from previous SIS iterations for(int dd = 0; dd < cur_feat; ++dd) { - if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13) + if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), _phi[inds[ii]]->value_ptr(), _n_samp)) < 1e-10) { is_valid = false; break; @@ -320,7 +327,7 @@ void FeatureSpace::sis(std::vector<double>& prop) if(*std::min_element(scores_comp.begin(), scores_comp.begin() + cur_feat_local) < 1e-10) { int dd = std::min_element(scores_comp.begin(), scores_comp.begin() + cur_feat_local) - scores_comp.begin(); - if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13) + if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), _phi[inds[ii]]->value_ptr(), _n_samp)) < 1e-10) is_valid = false; } @@ -328,7 +335,7 @@ void FeatureSpace::sis(std::vector<double>& prop) { inds_selected[cur_feat_local] = _phi[inds[ii]]->feat_ind(); scores_selected[cur_feat_local] = _scores[inds[ii]]; - phi_selected.push_back(std::make_shared<FeatureNode>(cur_feat + cur_feat_local, _phi[inds[ii]]->expr(), _phi[inds[ii]]->value(), _phi[inds[ii]]->unit(), true)); + phi_selected.push_back(std::make_shared<FeatureNode>(cur_feat + cur_feat_local, _phi[inds[ii]]->expr(), _phi[inds[ii]]->value(), _phi[inds[ii]]->test_value(), _phi[inds[ii]]->unit(), true)); ++cur_feat_local; } ++ii; @@ -418,6 +425,7 @@ void FeatureSpace::sis(std::vector<double>& prop) _phi_selected.push_back(sent_phi[inds[ii]]); _phi_selected.back()->reindex(cur_feat); _phi_selected.back()->set_value(); + _phi_selected.back()->set_test_value(); ++cur_feat; } } @@ -459,7 +467,7 @@ void FeatureSpace::sis(std::vector<double>& prop) if(*std::min_element(scores_comp.begin(), scores_comp.begin() + cur_feat_local) < 1e-10) { int dd = std::min_element(scores_comp.begin(), scores_comp.begin() + cur_feat_local) - scores_comp.begin(); - if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), sent_phi[inds[ii]]->value().data(), prop.size())) < 1e-13) + if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), sent_phi[inds[ii]]->value().data(), _n_samp)) < 1e-13) is_valid = false; } @@ -468,6 +476,7 @@ void FeatureSpace::sis(std::vector<double>& prop) _phi_selected.push_back(sent_phi[inds[ii]]); _phi_selected.back()->reindex(cur_feat); _phi_selected.back()->set_value(); + _phi_selected.back()->set_test_value(); ++cur_feat_local; ++cur_feat; } diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index cf926e2fafb2314f88b0b40648b1987ad17deaa2..ac3c2362ba27b37da58af16120dd4530510bf834 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -99,7 +99,7 @@ public: * * @param prop [description] */ - void project_r(double* prop); + void project_r(double* prop, int size); /** * @brief Perform SIS on a feature set with a specified property diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp index 18f463df3e6fe54102a879fde5aba00771aec6a5..7d8ac1f38402eec8be77f1a0ec6a34ec07f69457 100644 --- a/src/feature_creation/node/FeatureNode.cpp +++ b/src/feature_creation/node/FeatureNode.cpp @@ -3,14 +3,16 @@ FeatureNode::FeatureNode() {} -FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit, bool selected) : - Node(feat_ind, value.size()), +FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool selected) : + Node(feat_ind, value.size(), test_value.size()), _selected(selected), _expr(expr), _unit(unit), - _value(value) + _value(value), + _test_value(test_value) { set_value(); + set_test_value(); } FeatureNode::~FeatureNode() diff --git a/src/feature_creation/node/FeatureNode.hpp b/src/feature_creation/node/FeatureNode.hpp index 584d75ed1205bd79e6f986af6843a4ee29fa8ae8..f6504d49fea7af6a8d408231bb0992ef5ea4e28c 100644 --- a/src/feature_creation/node/FeatureNode.hpp +++ b/src/feature_creation/node/FeatureNode.hpp @@ -30,6 +30,7 @@ class FeatureNode: public Node ar & _expr; ar & _unit; ar & _value; + ar & _test_value; } protected: @@ -37,6 +38,7 @@ protected: std::string _expr; //!< Expression of the feature Unit _unit; //!< Unit for the feature std::vector<double> _value; //!< values for the feature + std::vector<double> _test_value; //!< test values for the feature public: /** * @brief Base Constructor @@ -50,9 +52,10 @@ public: * @param feat_ind index of the feature * @param expr Expression for the feature * @param value Value of the feature for each sample + * @param value Value of the feature for each test sample * @param unit Unit of the feature */ - FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit, bool selected=false); + FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool selected=false); ~FeatureNode(); @@ -71,10 +74,21 @@ public: */ inline std::vector<double> value(){return _value;} + /** + * @brief Get the test values of the feature + */ + inline std::vector<double> test_value(){return _test_value;} + /** * @brief Set the value for the feature */ inline void set_value(int offset = -1){std::copy_n(_value.data(), _n_samp, value_ptr());} + + /** + * @brief Set the test value for the feature + */ + inline void set_test_value(int offset = -1){std::copy_n(_test_value.data(), _n_test_samp, test_value_ptr());} + /** * @brief Check if the feature contains NaN */ @@ -99,6 +113,12 @@ public: */ inline double* value_ptr(int offset = 0){return _selected ? node_value_arrs::get_d_matrix_ptr(_arr_ind) : node_value_arrs::get_value_ptr(_arr_ind, offset);} + /** + * @brief Accessor function to the value of the feature's test set + */ + inline double* test_value_ptr(int offset = 0){return node_value_arrs::get_test_value_ptr(_arr_ind, offset);} + + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/Node.cpp b/src/feature_creation/node/Node.cpp index 86a2d8bb50a4c5c43e8d7e11515e88842088ddc3..d4cd1d873193d196bba2c4f6f2e490e49762edd9 100644 --- a/src/feature_creation/node/Node.cpp +++ b/src/feature_creation/node/Node.cpp @@ -3,7 +3,8 @@ Node::Node() {} -Node::Node(int feat_ind, int n_samp) : +Node::Node(int feat_ind, int n_samp, int n_test_samp) : + _n_test_samp(n_test_samp), _n_samp(n_samp), _feat_ind(feat_ind), _arr_ind(feat_ind) diff --git a/src/feature_creation/node/Node.hpp b/src/feature_creation/node/Node.hpp index 3b19745ef2b9e44ab9d3d783475af1707a8afdb0..d3d60fadcadb1f9d42e4b2fa479586b93b611c25 100644 --- a/src/feature_creation/node/Node.hpp +++ b/src/feature_creation/node/Node.hpp @@ -31,13 +31,15 @@ class Node template <typename Archive> void serialize(Archive& ar, const unsigned int version) { + ar & _n_test_samp; ar & _n_samp; ar & _feat_ind; ar & _arr_ind; } protected: - int _n_samp; //!< Number of samples in the feature + int _n_test_samp; //!< Number of samples in the feature's test set + int _n_samp; //!< Number of samples in the feature's test set int _feat_ind; //!< Index of the feature int _arr_ind; //!< Index of the feature for the value arrays @@ -53,8 +55,9 @@ public: * * @param feat_ind index of the feature * @param n_samp number of samples in the node + * @param n_samp number of test samples in the node */ - Node(int feat_ind, int n_samp); + Node(int feat_ind, int n_samp, int n_test_samp); virtual ~Node(); @@ -80,6 +83,11 @@ public: */ inline int n_samp(){return _n_samp;} + /** + * @brief Acesssor function to get the number of samples in the test set + */ + inline int n_test_samp(){return _n_test_samp;} + /** * @brief Accessor function to get the feature ind */ @@ -105,6 +113,11 @@ public: */ virtual std::vector<double> value() = 0; + /** + * @brief Get the test_value of the descriptor + */ + virtual std::vector<double> test_value() = 0; + virtual std::vector<std::shared_ptr<Node>> feats() = 0; /** @@ -117,6 +130,16 @@ public: */ virtual double* value_ptr(int offset = 0) = 0; + /** + * @brief Set the test value for the feature + */ + virtual void set_test_value(int offset = -1) = 0; + + /** + * @brief Accessor function to the test value of the feature + */ + virtual double* test_value_ptr(int offset = 0) = 0; + /** * @brief Check if the feature contains NaN */ diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.cpp b/src/feature_creation/node/operator_nodes/OperatorNode.cpp index 6999d46410268a63c9d67e7a5a2a505e4504da76..80b6ea1321f02bf68049e31356744fd8309c12bb 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.cpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.cpp @@ -4,7 +4,7 @@ OperatorNode::OperatorNode() {} OperatorNode::OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) : - Node(feat_ind, feats[0]->n_samp()), + Node(feat_ind, feats[0]->n_samp(), feats[0]->n_test_samp()), _feats(feats) {} @@ -23,6 +23,18 @@ double* OperatorNode::value_ptr(int offset) return node_value_arrs::get_value_ptr(_arr_ind, offset); } +double* OperatorNode::test_value_ptr(int offset) +{ + offset = (offset == -1) ? rung() : offset; + if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_test_reg(_arr_ind, offset) != _arr_ind)) + { + set_test_value(offset); + node_value_arrs::temp_storage_test_reg(_arr_ind, offset) = _arr_ind; + } + + return node_value_arrs::get_test_value_ptr(_arr_ind, offset); +} + std::vector<double> OperatorNode::value() { std::vector<double> val(_n_samp, 0.0); @@ -30,4 +42,11 @@ std::vector<double> OperatorNode::value() return val; } +std::vector<double> OperatorNode::test_value() +{ + std::vector<double> val(_n_test_samp, 0.0); + std::copy_n(test_value_ptr(), _n_test_samp, val.data()); + return val; +} + BOOST_SERIALIZATION_ASSUME_ABSTRACT(OperatorNode) diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.hpp b/src/feature_creation/node/operator_nodes/OperatorNode.hpp index 5c32779c928ec817e6e81417a858eb3b673fa00e..d8aaa8bba8767fdea3f2171440f461e4d02736b1 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.hpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.hpp @@ -55,10 +55,14 @@ public: std::vector<double> value(); + std::vector<double> test_value(); + inline std::vector<node_ptr> feats(){return _feats;} virtual void set_value(int offset = -1) = 0; + virtual void set_test_value(int offset = -1) = 0; + /** * @brief Get the pointer to the feature's data * @details If the feature is not already stored in memory, then calculate the feature and return the pointer to the data @@ -69,6 +73,16 @@ public: */ double* value_ptr(int offset=-1); + /** + * @brief Get the pointer to the feature's data + * @details If the feature is not already stored in memory, then calculate the feature and return the pointer to the data + * + * @param offset the integer value to offset the location in the temporary storage array + * + * @return pointer to the feature's test values + */ + double* test_value_ptr(int offset=-1); + /** * @brief Check if the feature contains NaN */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp index 1dc3b2bce3ebaa7b48b08250f11d9b01815fec4a..ae7ee26ae67cfa0a0c30b9262d6ab2a293ea102c 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp @@ -18,7 +18,7 @@ AbsDiffNode::AbsDiffNode(std::vector<node_ptr> feats, int rung, int feat_ind) : if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -29,6 +29,8 @@ AbsDiffNode::AbsDiffNode(std::vector<node_ptr> feats, int rung, int feat_ind) : set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind) : @@ -44,17 +46,20 @@ AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_in if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; if((std::abs(add_sub_tot_first) > 1) && std::all_of(add_sub_leaves.begin(), add_sub_leaves.end(), [&add_sub_tot_first](auto el){return el.second == add_sub_tot_first;})) throw InvalidFeatureException(); + set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); - } + + set_test_value(); +} void AbsDiffNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) { @@ -75,5 +80,5 @@ void AbsDiffNode::update_div_mult_leaves(std::map<std::string, double>& div_mult else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.hpp index 2076e44fbceb95d5dbb883a0f8893ade0f46a52d..7434f27edd2bf9b2004216e58da2ac506041f240 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::abs_diff(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::abs_diff(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp index d740f8029e6f7e7458b1096dc62b10cec1e9b0a8..7c01d21a57593bbdec591d607f9d7c45573b3baf 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp @@ -11,6 +11,8 @@ AbsNode::AbsNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } AbsNode::AbsNode(node_ptr feat, int rung, int feat_ind): @@ -19,6 +21,8 @@ AbsNode::AbsNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void AbsNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -40,5 +44,5 @@ void AbsNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.hpp index 94c68f4d905eec5fc4d99ac5e407007e140bf025..11cb5c7c60863ea11e960ffa0e7334a38c202bb0 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::abs(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::abs(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.cpp index 2922339361b4fbd385b59386d29652e38174af01..0a4742b5dc365a8f3920be53700f268fcd34164e 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.cpp @@ -16,7 +16,7 @@ AddNode::AddNode(std::vector<node_ptr> feats, int rung, int feat_ind): if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -27,6 +27,8 @@ AddNode::AddNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): @@ -42,7 +44,7 @@ AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -53,6 +55,8 @@ AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void AddNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -69,5 +73,5 @@ void AddNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.hpp index 39644de9e3a40a20e513614cffc4dcbf5f74731e..de207aa7c99bcb29b201c976b05f3821cf9d343c 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::add(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::add(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp index 10167f15e7185b2b53aa760bc734f909541a95c2..697094e2529677b64f7e3eb448d32c43431db713 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.cpp @@ -15,6 +15,8 @@ CosNode::CosNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } CosNode::CosNode(node_ptr feat, int rung, int feat_ind): @@ -29,6 +31,8 @@ CosNode::CosNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void CosNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -50,6 +54,6 @@ void CosNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.hpp index 54f77a233333aeb00c952c130d08067f02e8605b..97eda826f5f9bdface43dba53db42fcea8b63e01 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::cos(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::cos(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.cpp index 650817c17ca1a2b244c384f4055075a171e5787d..d98cc3e332a600651b28b731bd43ed558162ef33 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.cpp @@ -12,6 +12,8 @@ CbNode::CbNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } CbNode::CbNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ CbNode::CbNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void CbNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.hpp index 7cabb1bff436e6f4a68a7d5b2d074118ec973e01..420af5caa4873eb90dedad72de6a552840e604f6 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::cb(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::cb(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.cpp index 8c0f7b166238d9c350cfd3e9314099018ecc24c0..b824bec6e26a80d4a6593ad292ea09393172524b 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.cpp @@ -12,6 +12,8 @@ CbrtNode::CbrtNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } CbrtNode::CbrtNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ CbrtNode::CbrtNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void CbrtNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.hpp index 2486e4cd0e54252be932b84cf3c3606a921c1fd8..b69afd46e9bd2dbb3976fa61c46f49a15b0caac4 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cube_root.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::cbrt(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::cbrt(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.cpp index 74961765ca798fa43198ccaac9b0de6776e94832..6aa3db9390ac6cbd5fee4f1b95041f6474888898 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.cpp @@ -19,14 +19,16 @@ DivNode::DivNode(std::vector<node_ptr> feats, int rung, int feat_ind): if(std::abs(std::accumulate(div_mult_leaves.begin(), div_mult_leaves.end(), -1.0*expected_abs_tot, [](double tot, auto el){return tot + std::abs(el.second);})) > 1e-12) throw InvalidFeatureException(); - int div_mult_tot_first = div_mult_leaves.begin()->second; + double div_mult_tot_first = div_mult_leaves.begin()->second; - if((std::abs(div_mult_tot_first) > 1) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) + if((std::abs(div_mult_tot_first) != 1.0) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) throw InvalidFeatureException(); set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } DivNode::DivNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): @@ -45,14 +47,16 @@ DivNode::DivNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): if(std::abs(std::accumulate(div_mult_leaves.begin(), div_mult_leaves.end(), -1.0*expected_abs_tot, [](double tot, auto el){return tot + std::abs(el.second);})) > 1e-12) throw InvalidFeatureException(); - int div_mult_tot_first = div_mult_leaves.begin()->second; + double div_mult_tot_first = div_mult_leaves.begin()->second; - if((std::abs(div_mult_tot_first) > 1) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) + if((std::abs(div_mult_tot_first) != 1.0) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) throw InvalidFeatureException(); set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void DivNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.hpp index 073c9dc93ed5b9f39fb577fb7fd4e0566fa75a29..7b06b84e83b1a47a9dcb1c26968ae654375c9128 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/divide.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::div(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::div(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.cpp index 8e81ea5fbe2d529a1c9011b61cace2a526cafd29..c12d2397ac1c84168f6e49ebc091e8fa57bf4dd2 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.cpp @@ -15,6 +15,8 @@ ExpNode::ExpNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } ExpNode::ExpNode(node_ptr feat, int rung, int feat_ind): @@ -29,6 +31,8 @@ ExpNode::ExpNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void ExpNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -50,5 +54,5 @@ void ExpNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.hpp index 7413f64934b939a4ec93c5db46264a3a973cae60..4903e366c55490298b06b102bb8af7c4c5525153 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exponential.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::exp(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::exp(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.cpp index bdccb2d19b629e0c7aaae9c8ca8e61766d9bb2e9..b8f0e456dfd30f19b71f61c4e027f62acfea645b 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.cpp @@ -12,6 +12,8 @@ InvNode::InvNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } InvNode::InvNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ InvNode::InvNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void InvNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp index 17749864124fe09347a8435b51734b1b31ec3510..8147863dce3db3d49666dc13a229e815916c87e6 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::inv(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::inv(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.cpp index cab91e516d6d138cd42be1b5faea50c0bb759fa7..d0af28396c424cf0870f67e980095f583431b249 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.cpp @@ -15,6 +15,8 @@ LogNode::LogNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } LogNode::LogNode(node_ptr feat, int rung, int feat_ind): @@ -29,6 +31,8 @@ LogNode::LogNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void LogNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -50,5 +54,5 @@ void LogNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.hpp index 56395dd6bc270eae02adad5d8753eba7cf395926..864b60c7c69368c877d831086aa82a397e1cc594 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log.hpp @@ -29,6 +29,12 @@ public: allowed_op_funcs::log(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::log(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.cpp index c92d37efeac8808e5329524b45ff09c535427ea7..bc0e13faf34eeb8c41a33ac67627e47232afc98f 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.cpp @@ -16,14 +16,16 @@ MultNode::MultNode(std::vector<node_ptr> feats, int rung, int feat_ind): if(std::abs(std::accumulate(div_mult_leaves.begin(), div_mult_leaves.end(), -1.0*expected_abs_tot, [](double tot, auto el){return tot + std::abs(el.second);})) > 1e-12) throw InvalidFeatureException(); - int div_mult_tot_first = div_mult_leaves.begin()->second; + double div_mult_tot_first = div_mult_leaves.begin()->second; - if((std::abs(div_mult_tot_first) > 1) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) + if((std::abs(div_mult_tot_first) != 1.0) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) throw InvalidFeatureException(); set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } MultNode::MultNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): @@ -39,14 +41,16 @@ MultNode::MultNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): if(std::abs(std::accumulate(div_mult_leaves.begin(), div_mult_leaves.end(), -1.0*expected_abs_tot, [](double tot, auto el){return tot + std::abs(el.second);})) > 1e-12) throw InvalidFeatureException(); - int div_mult_tot_first = div_mult_leaves.begin()->second; + double div_mult_tot_first = div_mult_leaves.begin()->second; - if((std::abs(div_mult_tot_first) > 1) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) + if((std::abs(div_mult_tot_first) - 1.0 > 1e-12) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) throw InvalidFeatureException(); set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void MultNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.hpp index 55fbd06df85b95f1814119886188f6f1f20fe63d..f78a0ed7679013503edbeb990d66a229eeaf0a4f 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/multiply.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::mult(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::mult(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp index 4776b3964cfc52c7a0349b98d56b77fcf9432075..b4761ebeb08aa15ba57806183ac502eb3ece8473 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.cpp @@ -52,5 +52,5 @@ void NegExpNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_ else div_mult_leaves[key] = -1.0 * fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.hpp index 78c07cb664e479b8a8870f296357d47ea3cd85d8..c7dde942319d4bc0970fba8be818350923216d71 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/negative_exponential.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::neg_exp(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); }; + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::neg_exp(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + }; + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp index 5aafb0533fcfe10ebe80b8ca8f4b6bc6de5ca3bf..3ab1f7e5571dacf1361ad0d172a93c395949a6ab 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.cpp @@ -15,6 +15,8 @@ SinNode::SinNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SinNode::SinNode(node_ptr feat, int rung, int feat_ind): @@ -29,6 +31,8 @@ SinNode::SinNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SinNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -50,5 +54,5 @@ void SinNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.hpp index 7aee79ea653bd80de18988c7aaedb1a0c06b6820..cb606b1032ef75c91e94ef2e87d84fe0b291d0cf 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::sin(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::sin(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.cpp index 4c85272e3951687d9fa53bb696eb651c31268825..a74ef6d6e41de4accc5d087ec3d59717268b1c30 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.cpp @@ -12,6 +12,8 @@ SixPowNode::SixPowNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SixPowNode::SixPowNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ SixPowNode::SixPowNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SixPowNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp index 39c0a111293dc2339b29003676c64e33d838c92e..8fb575b4b8ffa585324e1db21fd3b3ff92f93a0e 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::sixth_pow(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::sixth_pow(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.cpp index 4523668d484cfc662e6abf7685f486d0b7ebc335..7bfb9c6ef0e47b2e24209a0a4f1b4d1a6da93a1d 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.cpp @@ -12,6 +12,8 @@ SqNode::SqNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SqNode::SqNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ SqNode::SqNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SqNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.hpp index 5a743e400e12e352beacac78d9ed40796bc311cd..dc5a7acc532e47998f70292ced14dffc6d1f7b7d 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square.hpp @@ -29,6 +29,11 @@ public: allowed_op_funcs::sq(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::sq(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.cpp index 50cd210864ae046dd1a728e043d0f522ff7f7745..0f30bb2b1065fd81d2bdfeb86c32068dce7aa8be 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.cpp @@ -12,6 +12,8 @@ SqrtNode::SqrtNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SqrtNode::SqrtNode(node_ptr feat, int rung, int feat_ind): @@ -23,6 +25,8 @@ SqrtNode::SqrtNode(node_ptr feat, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SqrtNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.hpp index 7b7769646185329354e16a642e568f74b5a37fa8..07d2736ef2e544d4136236f588ae1ee616530e6f 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/square_root.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::sqrt(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::sqrt(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.cpp index 801d9d77f0b4e67b2123fa694e49520772888880..0760bc8bd947be1bc09a796c125ed100a8799941 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.cpp @@ -16,7 +16,7 @@ SubNode::SubNode(std::vector<node_ptr> feats, int rung, int feat_ind): if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -27,6 +27,8 @@ SubNode::SubNode(std::vector<node_ptr> feats, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } SubNode::SubNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): @@ -42,7 +44,7 @@ SubNode::SubNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); - if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) > 0) + if(std::abs(std::accumulate(add_sub_leaves.begin(), add_sub_leaves.end(), -1*expected_abs_tot, [](int tot, auto el){return tot + std::abs(el.second);})) != 0) throw InvalidFeatureException(); int add_sub_tot_first = add_sub_leaves.begin()->second; @@ -53,6 +55,8 @@ SubNode::SubNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind): set_value(); if(is_nan() || is_const()) throw InvalidFeatureException(); + + set_test_value(); } void SubNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) @@ -69,5 +73,5 @@ void SubNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_lea else div_mult_leaves[key] = fact; - expected_abs_tot *= std::abs(fact); + expected_abs_tot += std::abs(fact); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp index c645a665ccdc2747a9e75bb7889b8caa49efc15b..2c946896b612fa62ba92fc4bbaa3eee425cb1621 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/subtract.hpp @@ -30,6 +30,12 @@ public: allowed_op_funcs::sub(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset)); } + inline void set_test_value(int offset = -1) + { + offset = (offset == -1) ? rung() : offset; + allowed_op_funcs::mult(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), _feats[1]->test_value_ptr(offset + 1), node_value_arrs::get_test_value_ptr(_arr_ind, offset)); + } + /** * @brief return the rung of the feature */ diff --git a/src/feature_creation/node/operator_nodes/functions.hpp b/src/feature_creation/node/operator_nodes/functions.hpp index f14cd8eba09c042683baed998febf6f2ef269c27..e60e753f173b9fd1afd8fc1e3ffd78dce923354a 100644 --- a/src/feature_creation/node/operator_nodes/functions.hpp +++ b/src/feature_creation/node/operator_nodes/functions.hpp @@ -27,7 +27,7 @@ namespace allowed_op_funcs */ inline void sub(int size, double* in_0, double* in_1, double* out) { - std::transform(in_0, in_0 + size, in_1, out, std::minus<double>()); + std::transform(in_0, in_0 + size, in_1, out, [](double in_0, double in_1){return in_0 - in_1;}); } /** @@ -63,7 +63,7 @@ namespace allowed_op_funcs */ inline void div(int size, double* in_0, double* in_1, double* out) { - std::transform(in_0, in_0 + size, in_1, out, std::divides<double>()); + std::transform(in_0, in_0 + size, in_1, out, [](double in_0, double in_1){return in_0 / in_1;}); } /** diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.cpp b/src/feature_creation/node/value_storage/nodes_value_containers.cpp index 1da1c91a2d8005cd1a45ac8b69f92f50bec2fd6b..6ab863bb47aa6426209f411efcee37206ce2abaa 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.cpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.cpp @@ -4,12 +4,17 @@ int node_value_arrs::N_SELECTED; int node_value_arrs::N_SAMPLES; int node_value_arrs::N_STORE_FEATURES; int node_value_arrs::N_RUNGS_STORED; +int node_value_arrs::N_SAMPLES_TEST; std::vector<int> node_value_arrs::TEMP_STORAGE_REG; +std::vector<int> node_value_arrs::TEMP_STORAGE_TEST_REG; std::vector<double> node_value_arrs::D_MATRIX; std::vector<double> node_value_arrs::VALUES_ARR; +std::vector<double> node_value_arrs::TEST_VALUES_ARR; std::vector<double> node_value_arrs::TEMP_STORAGE_ARR; +std::vector<double> node_value_arrs::TEMP_STORAGE_TEST_ARR; + int node_value_arrs::get_number_new_features(std::string new_op, int n_current_features) { @@ -51,16 +56,21 @@ int node_value_arrs::get_max_number_features(std::vector<std::string> allowed_op return n_feats; } -void node_value_arrs::initialize_values_arr(int n_samples, int n_primary_feat) +void node_value_arrs::initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat) { N_SAMPLES = n_samples; + N_SAMPLES_TEST = n_samples_test; N_RUNGS_STORED = 0; N_STORE_FEATURES = n_primary_feat; VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES); + TEST_VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES_TEST); TEMP_STORAGE_ARR = std::vector<double>(3 * N_STORE_FEATURES * N_SAMPLES); TEMP_STORAGE_REG = std::vector<int>(3 * N_STORE_FEATURES, -1); + + TEMP_STORAGE_TEST_ARR = std::vector<double>(3 * N_STORE_FEATURES * N_SAMPLES_TEST); + TEMP_STORAGE_TEST_REG = std::vector<int>(3 * N_STORE_FEATURES, -1); } void node_value_arrs::resize_values_arr(int n_dims, int n_feat, bool use_temp) @@ -71,13 +81,22 @@ void node_value_arrs::resize_values_arr(int n_dims, int n_feat, bool use_temp) VALUES_ARR.resize(N_STORE_FEATURES * N_SAMPLES); VALUES_ARR.shrink_to_fit(); + TEST_VALUES_ARR.resize(N_STORE_FEATURES * N_SAMPLES_TEST); + TEST_VALUES_ARR.shrink_to_fit(); + if(use_temp) { TEMP_STORAGE_ARR.resize(3 * N_STORE_FEATURES * N_SAMPLES); TEMP_STORAGE_ARR.shrink_to_fit(); - TEMP_STORAGE_REG.resize(3 * N_STORE_FEATURES * N_SAMPLES, - 1); + TEMP_STORAGE_REG.resize(3 * N_STORE_FEATURES, - 1); TEMP_STORAGE_REG.shrink_to_fit(); + + TEMP_STORAGE_TEST_ARR.resize(3 * N_STORE_FEATURES * N_SAMPLES_TEST); + TEMP_STORAGE_TEST_ARR.shrink_to_fit(); + + TEMP_STORAGE_TEST_REG.resize(3 * N_STORE_FEATURES, - 1); + TEMP_STORAGE_TEST_REG.shrink_to_fit(); } else { @@ -94,6 +113,14 @@ double* node_value_arrs::get_value_ptr(int ind, int offset) return access_temp_storage((ind % N_STORE_FEATURES) + (offset % 3) * N_STORE_FEATURES); } +double* node_value_arrs::get_test_value_ptr(int ind, int offset) +{ + if(ind < N_STORE_FEATURES) + return access_test_value_arr(ind); + temp_storage_test_reg(ind, offset) = ind; + return access_temp_storage_test((ind % N_STORE_FEATURES) + (offset % 3) * N_STORE_FEATURES); +} + void node_value_arrs::initialize_d_matrix_arr() { N_SELECTED = 0; diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.hpp b/src/feature_creation/node/value_storage/nodes_value_containers.hpp index 54d8c5d6af561c7d1703f8b14f8c36abf86d5e57..c992eaa3cc366202314d05fe0b0f8abed61f07eb 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.hpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.hpp @@ -11,13 +11,17 @@ namespace node_value_arrs { extern int N_SELECTED; //!< Number of features selected extern int N_SAMPLES; //!< Number of samples in the nodes + extern int N_SAMPLES_TEST; //!< Number of samples in the nodes extern int N_STORE_FEATURES; //!< Number of features with stored values extern int N_RUNGS_STORED; //!< Number of rungs with values stored extern std::vector<int> TEMP_STORAGE_REG; //!< Register to see which feature is stored in each slot + extern std::vector<int> TEMP_STORAGE_TEST_REG; //!< Register to see which feature is stored in each slot extern std::vector<double> D_MATRIX; //!< The descriptor matrix extern std::vector<double> VALUES_ARR; //!< Value of the stored features + extern std::vector<double> TEST_VALUES_ARR; //!< Value of the stored features test values extern std::vector<double> TEMP_STORAGE_ARR; //!< Array to temporarily store feature values + extern std::vector<double> TEMP_STORAGE_TEST_ARR; //!< Array to temporarily store feature values /** * @brief Get the maximum number of new features for each rung @@ -44,9 +48,10 @@ namespace node_value_arrs * @details Take initial parameters and construct the feature arraies * * @param n_samples number of samples per feature + * @param n_samples_test number of test samples per feature * @param n_primary_feat number of primary features */ - void initialize_values_arr(int n_samples, int n_primary_feat); + void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat); /** * @brief set of the value arrays @@ -85,6 +90,16 @@ namespace node_value_arrs */ inline int& temp_storage_reg(int ind, int offset = 0){return TEMP_STORAGE_REG[(ind % N_STORE_FEATURES) + (offset % 3) * N_STORE_FEATURES];} + /** + * @brief Get a reference slot/feature test register + * + * @param ind Feature index + * @param offset Offset integer for TEMP_STORE_TEST_ARRAY + * + * @return The register element for a given feature index and offset + */ + inline int& temp_storage_test_reg(int ind, int offset = 0){return TEMP_STORAGE_TEST_REG[(ind % N_STORE_FEATURES) + (offset % 3) * N_STORE_FEATURES];} + /** * @brief Access element of the permanent storage array * @@ -94,6 +109,15 @@ namespace node_value_arrs */ inline double* access_value_arr(int feature_ind){return VALUES_ARR.data() + feature_ind*N_SAMPLES;} + /** + * @brief Access element of the test value array + * + * @param feature_ind The feature index to access + * + * @return pointer to the feature;s test data array + */ + inline double* access_test_value_arr(int feature_ind){return TEST_VALUES_ARR.data() + feature_ind*N_SAMPLES_TEST;} + /** * @brief Access element of temporary storage array * @@ -103,6 +127,15 @@ namespace node_value_arrs */ inline double* access_temp_storage(int slot){return TEMP_STORAGE_ARR.data() + slot*N_SAMPLES;} + /** + * @brief Access element of temporary storage test array + * + * @param slot The slot of the temporary storage test arrays + * + * @return pointer to the feature's temporary test storage + */ + inline double* access_temp_storage_test(int slot){return TEMP_STORAGE_TEST_ARR.data() + slot*N_SAMPLES_TEST;} + /** * @brief Access the value_ptr to a feature * @@ -113,6 +146,16 @@ namespace node_value_arrs */ double* get_value_ptr(int ind, int offset = 0); + /** + * @brief Access the test_value_ptr to a feature + * + * @param ind Feature index + * @param offset the offset for the storage + * + * @return The value pointer + */ + double* get_test_value_ptr(int ind, int offset = 0); + /** * @brief Get the pointer to a primary feature * diff --git a/src/feature_creation/units/Unit.cpp b/src/feature_creation/units/Unit.cpp index 253516c2d51981de9803a3b06d9f5677c33b2c11..0347bfdcd2c7c38d9a818c165ad052184dc45025 100644 --- a/src/feature_creation/units/Unit.cpp +++ b/src/feature_creation/units/Unit.cpp @@ -134,6 +134,13 @@ bool Unit::equal(Unit unit_2) else if(_dct[el.first] != el.second) return false; } + for(auto& el : _dct) + { + if(unit_2.dct().count(el.first) == 0) + return false; + else if(unit_2.dct()[el.first] != el.second) + return false; + } if(unit_2.dct().size() == 0) { for(auto& el : _dct) diff --git a/src/inputs/InputParser.cpp b/src/inputs/InputParser.cpp index 435930e59caa803610dd818c6f4b28d7128cdafa..f362c51a233d267e407a1cf36fe2b8b9f052ed34 100644 --- a/src/inputs/InputParser.cpp +++ b/src/inputs/InputParser.cpp @@ -5,17 +5,37 @@ InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::sh _n_sis_select(IP.get<int>("_n_sis_select")), _max_rung(IP.get<int>("max_rung")), _max_store_rung(IP.get<int>("n_rung_store", _max_rung - 1)), - _n_samp(0), + _n_samp(-1), + _n_residuals(IP.get<int>("n_residual", 1)), _filename(fn), _data_file(IP.get<std::string>("data_file", "data.csv")), _prop_key(IP.get<std::string>("property_key", "prop")), + _leave_out_inds(as_vector<int>(IP, "leave_out_inds")), _opset(as_vector<std::string>(IP, "opset")) { + std::ifstream data_stream; std::string line; data_stream.open(_data_file, std::ios::in); while (std::getline(data_stream, line)) ++_n_samp; + _n_leave_out = IP.get<int>("n_leave_out", static_cast<int>(std::round(_n_samp * IP.get<double>("leave_out_frac", 0.0)))); + if((_leave_out_inds.size() == 0) && _n_leave_out > 0) + { + _leave_out_inds = std::vector<int>(_n_leave_out); + + unsigned seed = std::chrono::system_clock::now().time_since_epoch().count(); + std::vector<int> indexes(_n_samp, 0); + for(int ii = 0; ii < _n_samp; ++ii) + indexes[ii] = ii; + std::shuffle (indexes.begin(), indexes.end(), std::default_random_engine(seed)); + + std::copy_n(indexes.begin(), _n_leave_out, _leave_out_inds.begin()); + } + else if((_n_leave_out == 0) && (_leave_out_inds.size() > 0)) + _n_leave_out = _leave_out_inds.size(); + else if(_leave_out_inds.size() != _n_leave_out) + throw std::logic_error("n_leave_out is not the same size as leave_out_inds"); if(_opset.size() == 0) { @@ -81,14 +101,16 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm) } else { - std::cout << name_unit_split.size() << std::endl; throw std::logic_error("Invalid feature name \"" + split_line[dd] + "\" in header of file"); } } - std::vector<std::vector<double>> data(headers.size(), std::vector<double>(_n_samp - 1)); + std::vector<std::vector<double>> data(headers.size(), std::vector<double>(_n_samp - _n_leave_out)); + std::vector<std::vector<double>> test_data(headers.size(), std::vector<double>(_n_leave_out)); std::vector<std::string> bad_samples; int cur_line = 0; + int n_train_samp = 0; + int n_test_samp = 0; while (std::getline(data_stream, line)) { boost::algorithm::split(split_line, line, [](char c) {return c==',';}); @@ -97,8 +119,18 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm) if(split_line.size() != headers.size() + 1) bad_samples.push_back(split_line[0]); - for(int ii = 1; ii < split_line.size(); ++ii) - data[ii-1][cur_line] = std::stod(split_line[ii]); + if(std::any_of(_leave_out_inds.begin(), _leave_out_inds.end(), [&cur_line](int ind){return ind == cur_line;})) + { + for(int ii = 1; ii < split_line.size(); ++ii) + test_data[ii-1][n_test_samp] = std::stod(split_line[ii]); + ++n_test_samp; + } + else + { + for(int ii = 1; ii < split_line.size(); ++ii) + data[ii-1][n_train_samp] = std::stod(split_line[ii]); + ++n_train_samp; + } ++cur_line; } @@ -113,10 +145,7 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm) int _propind = 0; while((headers[_propind] != _prop_key) && (_propind < headers.size())) - { - std::cout << _prop_key << '\t' << headers[_propind] << std::endl; ++_propind; - } if(_propind >= headers.size()) { @@ -124,18 +153,23 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm) } else { - _prop = std::vector<double>(data[_propind].size(), 0.0); - std::copy_n(data[_propind].begin(), data[_propind].size(), _prop.begin()); + _prop_train = std::vector<double>(data[_propind].size(), 0.0); + _prop_test = std::vector<double>(test_data[_propind].size(), 0.0); + + std::copy_n(data[_propind].begin(), data[_propind].size(), _prop_train.begin()); + std::copy_n(test_data[_propind].begin(), test_data[_propind].size(), _prop_test.begin()); data.erase(data.begin() + _propind); + test_data.erase(test_data.begin() + _propind); headers.erase(headers.begin() + _propind); units.erase(units.begin() + _propind); } - node_value_arrs::initialize_values_arr(_prop.size(), headers.size()); + node_value_arrs::initialize_values_arr(_prop_train.size(), _prop_test.size(), headers.size()); + std::vector<node_ptr> phi_0; for(int ff = 0; ff < headers.size(); ++ff) - phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], units[ff])); + phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], test_data[ff], units[ff])); _feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _max_rung, _n_sis_select, _max_store_rung); } diff --git a/src/inputs/InputParser.hpp b/src/inputs/InputParser.hpp index b1c6c7a0de82fe953322d6686b32c4a4d1d1591d..e0ec8b105bf454bd176827934a2cba13c8b7a04a 100644 --- a/src/inputs/InputParser.hpp +++ b/src/inputs/InputParser.hpp @@ -11,6 +11,10 @@ #include <iostream> #include <fstream> +#include <algorithm> // std::shuffle +#include <random> // std::default_random_engine +#include <chrono> // std::chrono::system_clock + #include <feature_creation/feature_space/FeatureSpace.hpp> #include <feature_creation/node/value_storage/nodes_value_containers.hpp> @@ -27,14 +31,18 @@ public: int _max_store_rung; int _n_sis_select; int _n_samp; + int _n_residuals; + int _n_leave_out; std::string _filename; std::string _data_file; std::string _prop_key; + std::vector<int> _leave_out_inds; + std::vector<double> _prop_train; + std::vector<double> _prop_test; std::vector<std::string> _opset; - std::vector<double> _prop; InputParser(boost::property_tree::ptree IP, std::string fn, std::shared_ptr<MPI_Interface> comm); inline std::shared_ptr<FeatureSpace> feat_space(){return _feat_space;} diff --git a/src/main.cpp b/src/main.cpp index 0c6aea62984503c5eeacfff3486b057acce779af..7d51ce4f415ee2eb33a503b6de8689c535724d51 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -37,15 +37,20 @@ int main(int argc, char const *argv[]) std::cout<< "time input_parsing/Feature space generation: "<< duration << std::endl; node_value_arrs::initialize_d_matrix_arr(); - SISSORegressor sisso(IP._feat_space, IP._prop, IP._n_dim); + SISSORegressor sisso(IP._feat_space, IP._prop_train, IP._prop_test, IP._n_dim, IP._n_residuals); sisso.fit(); if(comm->rank() == 0) { - for(auto& model : sisso.models()) + for(int ii = 0; ii < sisso.models().size(); ++ii) { - std::cout << model.rmse() << std::endl; - std::cout << model << '\n' << std::endl; + std::cout << sisso.models()[ii][0].rmse() << std::endl; + std::cout << sisso.models()[ii][0] << "\n" << std::endl; + for(int jj = 0; jj < sisso.models()[ii].size(); ++jj) + { + sisso.models()[ii][jj].train_to_file("models/train_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat"); + sisso.models()[ii][jj].test_to_file("models/test_dim_" + std::to_string(ii) + "_model_" + std::to_string(jj) + ".dat"); + } } } return 0;