diff --git a/src/classification/LPWrapper.cpp b/src/classification/LPWrapper.cpp index bea4407c9bd7692caab21a3ff8e9757cfd158f4f..e46d327ca5f72301a2d49a6056de82c59efbbba0 100644 --- a/src/classification/LPWrapper.cpp +++ b/src/classification/LPWrapper.cpp @@ -71,7 +71,9 @@ LPWrapper::LPWrapper(const LPWrapper& o) : _n_overlap_test(o._n_overlap_test), _n_pts_check(o._n_pts_check), _n_pts_check_test(o._n_pts_check_test) -{} +{ + setup_constraints(); +} LPWrapper::LPWrapper(LPWrapper&& o) : _elements(o._elements), @@ -99,7 +101,9 @@ LPWrapper::LPWrapper(LPWrapper&& o) : _n_overlap_test(o._n_overlap_test), _n_pts_check(o._n_pts_check), _n_pts_check_test(o._n_pts_check_test) -{} +{ + setup_constraints(); +} LPWrapper& LPWrapper::operator= (const LPWrapper& o) { @@ -129,6 +133,8 @@ LPWrapper& LPWrapper::operator= (const LPWrapper& o) _n_pts_check = o._n_pts_check; _n_pts_check_test = o._n_pts_check_test; + setup_constraints(); + return *this; } @@ -160,6 +166,8 @@ LPWrapper& LPWrapper::operator= (LPWrapper&& o) _n_pts_check = std::move(o._n_pts_check); _n_pts_check_test = std::move(o._n_pts_check_test); + setup_constraints(); + return *this; } diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index bec9ac7b3fce024452fbb6396b64ecfde7f80003..491c58850d5cf4eacf0d458c2d44943a40b96650 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -183,7 +183,6 @@ void FeatureSpace::initialize_fs() } initialize_fs_output_files(); - project_funcs::set_project_fxn(_project_type, _task_sizes.size(), _project, _project_no_omp); comp_feats::set_is_valid_fxn(_project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list); set_op_lists(); @@ -970,21 +969,23 @@ void FeatureSpace::generate_feature_space() _n_feat = _phi.size(); } -void FeatureSpace::project_generated(const double* prop, const int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel) +// void FeatureSpace::project_generated(const double* prop, const int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel) +void FeatureSpace::project_generated(std::shared_ptr<LossFunction> loss, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel) { std::vector<double> scores_sel_all(node_value_arrs::N_SELECTED); if(node_value_arrs::N_SELECTED > _n_sis_select) { scores_sel_all.resize(_phi_selected.size()); - _project(prop, scores_sel_all.data(), _phi_selected, _task_sizes, size / _n_samp); + loss->project(_phi_selected, scores_sel_all.data()); } std::copy_n(scores_sel.data(), scores_sel.size(), &scores_sel_all[node_value_arrs::N_SELECTED - _n_sis_select]); int worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin(); double worst_score = scores_sel[worst_score_ind]; - #pragma omp parallel firstprivate(worst_score, worst_score_ind, prop, size, scores_sel_all) + #pragma omp parallel firstprivate(worst_score, worst_score_ind, loss, scores_sel_all) { + loss = loss_function_util::copy(loss); std::vector<node_ptr> phi_sel_private(phi_sel); std::vector<double> scores_sel_private(scores_sel); int index_base = _phi.size() + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size()); @@ -994,8 +995,7 @@ void FeatureSpace::project_generated(const double* prop, const int size, std::ve std::shared_ptr<NLOptimizer> reparam_optimizer; if(_reparam_residual) { - std::vector<double> prop_vec(size, 0.0); - std::copy_n(prop, size, prop_vec.data()); + std::vector<double> prop_vec(loss->prop_project()); reparam_optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop_vec, _max_phi, _max_param_depth); } else @@ -1037,7 +1037,7 @@ void FeatureSpace::project_generated(const double* prop, const int size, std::ve node_value_arrs::clear_temp_reg_thread(); std::vector<double> scores(generated_phi.size()); - _project_no_omp(prop, scores.data(), generated_phi, _task_sizes, size / _n_samp); + loss->project_no_omp(generated_phi, scores.data()); std::vector<int> inds = util_funcs::argsort<double>(scores); @@ -1127,6 +1127,20 @@ void FeatureSpace::project_generated(const double* prop, const int size, std::ve } void FeatureSpace::sis(const std::vector<double>& prop) +{ + sis( + loss_function_util::get_loss_function( + _project_type, + prop, + {}, + _task_sizes, + std::vector<int>(_task_sizes.size(), 0), + false + ) + ); +} + +void FeatureSpace::sis(const std::shared_ptr<LossFunction> loss) { // Reparameterize for the residuals #ifdef PARAMETERIZE @@ -1136,7 +1150,7 @@ void FeatureSpace::sis(const std::vector<double>& prop) _phi.resize(_n_feat); _phi_reparam.resize(0); _start_gen_reparam.resize(0); - generate_reparam_feature_set(prop); + generate_reparam_feature_set(loss->prop_project()); _phi.insert(_phi.end(), _phi_reparam.begin(), _phi_reparam.end()); _scores.resize(_phi.size()); } @@ -1163,7 +1177,8 @@ void FeatureSpace::sis(const std::vector<double>& prop) // Get projection scores double start_time = omp_get_wtime(); - _project(prop.data(), _scores.data(), _phi, _task_sizes, prop.size() / _n_samp); + loss->project(_phi, _scores.data()); + // _project(prop.data(), _scores.data(), _phi, _task_sizes, prop.size() / _n_samp); _mpi_comm->barrier(); if(_mpi_comm->rank() == 0) @@ -1187,7 +1202,8 @@ void FeatureSpace::sis(const std::vector<double>& prop) std::vector<double> scores_sel_all(node_value_arrs::N_SELECTED, 0.0); if(node_value_arrs::N_SELECTED > _n_sis_select) { - _project(prop.data(), scores_sel_all.data(), _phi_selected, _task_sizes, prop.size() / _n_samp); + loss->project(_phi_selected, scores_sel_all.data()); + // _project(prop.data(), scores_sel_all.data(), _phi_selected, _task_sizes, prop.size() / _n_samp); } // Get the best features currently generated on this rank @@ -1234,7 +1250,7 @@ void FeatureSpace::sis(const std::vector<double>& prop) phi_sel.resize(cur_feat_local); scores_sel.resize(cur_feat_local); - project_generated(prop.data(), prop.size(), phi_sel, scores_sel); + project_generated(loss, phi_sel, scores_sel); _mpi_comm->barrier(); if(_mpi_comm->rank() == 0) diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index fdf3652355177786438589a15fb760bf8644e7a2..7a7cd26db1b408e589fb99c27aa6bc94566e204e 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -17,13 +17,17 @@ #include <iomanip> #include <utility> -#include "mpi_interface/MPI_Interface.hpp" -#include "mpi_interface/MPI_ops.hpp" -#include "mpi_interface/serialize_tuple.h" #include "feature_creation/node/ModelNode.hpp" #include "feature_creation/node/operator_nodes/allowed_ops.hpp" #include "feature_creation/node/utils.hpp" #include "feature_creation/node/value_storage/nodes_value_containers.hpp" + +#include "loss_function/utils.hpp" + +#include "mpi_interface/MPI_Interface.hpp" +#include "mpi_interface/MPI_ops.hpp" +#include "mpi_interface/serialize_tuple.h" + #include "utils/compare_features.hpp" #include "utils/project.hpp" @@ -69,8 +73,6 @@ class FeatureSpace const std::string _feature_space_file; //!< File to store information about the selected features const std::string _feature_space_summary_file; //!< File to store information about the selected features - std::function<void(const double*, double*, const std::vector<node_ptr>&, const std::vector<int>&, const int)> _project; //!< Function used to calculate the scores for SIS - std::function<void(const double*, double*, const std::vector<node_ptr>&, const std::vector<int>&, const int)> _project_no_omp; //!< Function used to calculate the scores for SIS without changing omp environment std::function<bool(const double*, const int, const double, const std::vector<double>&, const double, const int, const int)> _is_valid; //!< Function used to calculate the scores for SIS std::function<bool(const double*, const int, const double, const std::vector<node_ptr>&, const std::vector<double>&, const double)> _is_valid_feat_list; //!< Function used to calculate the scores for SIS without changing omp environment @@ -350,7 +352,7 @@ public: * @param phi_selected The features that would be selected from the previous rungs * @param scores_selected The projection scores of the features that would be selected from the previous rungs */ - void project_generated(const double* prop, const int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected); + void project_generated(std::shared_ptr<LossFunction> loss, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected); /** * @brief Perform SIS on a feature set with a specified property @@ -360,6 +362,14 @@ public: */ void sis(const std::vector<double>& prop); + /** + * @brief Perform SIS on a feature set with a specified loss function + * @details Perform sure-independence screening with either the correct property or the error + * + * @param loss The LossFunction to project over + */ + void sis(std::shared_ptr<LossFunction> loss); + // DocString: feat_space_feat_in_phi /** * @brief Is a feature in this process' _phi? diff --git a/src/loss_function/LossFunction.cpp b/src/loss_function/LossFunction.cpp index f11793ee65779119e44d9bd772353f059917ccde..977e3783651692966cce76719bdb9bf6e7cb21b5 100644 --- a/src/loss_function/LossFunction.cpp +++ b/src/loss_function/LossFunction.cpp @@ -4,7 +4,8 @@ LossFunction::LossFunction( std::vector<double> prop_train, std::vector<double> prop_test, std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test + std::vector<int> task_sizes_test, + bool fix_intercept ) : _projection_prop(prop_train), _prop_train(prop_train), @@ -13,12 +14,34 @@ LossFunction::LossFunction( _error_test(prop_test.size(), 0.0), _task_sizes_train(task_sizes_train), _task_sizes_test(task_sizes_test), - _n_project_prop(1), _n_samp(std::accumulate(task_sizes_train.begin(), task_sizes_train.end(), 0)), _n_samp_test(std::accumulate(task_sizes_test.begin(), task_sizes_test.end(), 0)), - _n_task(task_sizes_train.size()) + _n_task(task_sizes_train.size()), + _n_project_prop(prop_train.size() / _n_samp), + _n_feat(1), + _n_dim(1 + (!fix_intercept)), + _fix_intercept(fix_intercept) {} +LossFunction::LossFunction(std::shared_ptr<LossFunction> o) : + _projection_prop(o->prop()), + _prop_train(o->prop_test()), + _prop_test(o->prop_project()), + _error_train(_prop_train.size(), 0.0), + _error_test(_prop_train.size(), 0.0), + _task_sizes_train(o->task_sizes_train()), + _task_sizes_test(o->task_sizes_test()), + _n_samp(std::accumulate(_task_sizes_train.begin(), _task_sizes_train.end(), 0)), + _n_samp_test(std::accumulate(_task_sizes_test.begin(), _task_sizes_test.end(), 0)), + _n_task(_task_sizes_train.size()), + _n_project_prop(_prop_train.size() / _n_samp), + _n_feat(o->n_feat()), + _n_dim(o->n_dim()), + _fix_intercept(o->fix_intercept()) +{ + _model_last_gen = o->model_last_gen(); +} + void LossFunction::project(const std::vector<node_ptr>& feats, double* scores) { prepare_project(); diff --git a/src/loss_function/LossFunction.hpp b/src/loss_function/LossFunction.hpp index 065aabd6bf9cb2e4a8727e0f4da1468b45196adb..f24c5542180d3e02fde229422507a7337c22852e 100644 --- a/src/loss_function/LossFunction.hpp +++ b/src/loss_function/LossFunction.hpp @@ -24,10 +24,13 @@ protected: std::vector<int> _task_sizes_train; //!< Number of training samples per task std::vector<int> _task_sizes_test; //!< Number of testing samples per task - int _n_project_prop; //!< Number of properties to project over int _n_samp; //!< Number of samples in the training set int _n_samp_test; //!< Number of samples in the test set int _n_task; //!< Number of tasks + int _n_project_prop; //!< Number of properties to project over + int _n_feat; + int _n_dim; + bool _fix_intercept; public: /** @@ -42,9 +45,12 @@ public: std::vector<double> prop_train, std::vector<double> prop_test, std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test + std::vector<int> task_sizes_test, + bool fix_intercept = false ); + LossFunction(std::shared_ptr<LossFunction> o); + /** * @brief Copy Constructor * @@ -128,6 +134,24 @@ public: { _model_last_gen = model_list; } + + virtual LOSS_TYPE type() const = 0; + + inline std::vector<int> task_sizes_train() const {return _task_sizes_train;} + inline std::vector<int> task_sizes_test() const {return _task_sizes_test;} + + inline const std::vector<double>& prop() const {return _prop_train;} + inline const std::vector<double>& prop_test() const {return _prop_test;} + inline const std::vector<double>& prop_project() const {return _projection_prop;} + + inline const double* prop_pointer() const {return _prop_train.data();} + inline const double* prop_test_pointer() const {return _prop_test.data();} + inline const double* prop_project_pointer() const {return _projection_prop.data();} + + inline std::vector<std::shared_ptr<Model>> model_last_gen() const {return _model_last_gen;} + inline bool fix_intercept() const {return _fix_intercept;} + inline int n_feat() const {return _n_feat;} + inline int n_dim() const {return _n_dim;} }; #endif diff --git a/src/loss_function/LossFunctionConvexHull.cpp b/src/loss_function/LossFunctionConvexHull.cpp index fb801dcb78916578442b24c3b0e0c86eead37331..b954ac0095df14e392809f41b6d55d3ff98a5a80 100644 --- a/src/loss_function/LossFunctionConvexHull.cpp +++ b/src/loss_function/LossFunctionConvexHull.cpp @@ -7,10 +7,11 @@ LossFunctionConvexHull::LossFunctionConvexHull( std::vector<int> task_sizes_test ) : LossFunction(prop_train, prop_test, task_sizes_train, task_sizes_test), - _n_class(0), - _n_feat(1), - _n_dim(1) + _width(1e-5), + _n_class(0) { + --_n_dim; + for(auto& pt : prop_test) { if(std::none_of(prop_train.begin(), prop_train.end(), [&pt](double pp){return std::abs(pp - pt) < 1e-10;})) @@ -68,6 +69,15 @@ LossFunctionConvexHull::LossFunctionConvexHull( std::copy_n(prop_test.begin(), prop_test.size(), _prop_test.begin()); } +LossFunctionConvexHull::LossFunctionConvexHull(std::shared_ptr<LossFunction> o) : + LossFunction(o), + _width(1e-5) +{ + _n_class = vector_utils::unique<double>(_prop_train).size(); + --_n_dim; +} + + void LossFunctionConvexHull::prepare_project() { std::vector<double> temp_prop; diff --git a/src/loss_function/LossFunctionConvexHull.hpp b/src/loss_function/LossFunctionConvexHull.hpp index 4990bcd94f69a75e4d75fd2d8ad45260b0a86f86..c4b3665b86f493527077aad4324e6773ea98ad05 100644 --- a/src/loss_function/LossFunctionConvexHull.hpp +++ b/src/loss_function/LossFunctionConvexHull.hpp @@ -24,10 +24,8 @@ protected: std::map<int, int> _sample_inds_to_sorted_dmat_inds; //!< map from input sample inds to the SORTED_D_MATRIX_INDS std::vector<double> _scores; - static const double _width = 1e-5; + const double _width; int _n_class; - int _n_feat; - int _n_dim; public: @@ -46,6 +44,8 @@ public: std::vector<int> task_sizes_test ); + LossFunctionConvexHull(std::shared_ptr<LossFunction> o); + /** * @brief Copy Constructor * @@ -116,6 +116,9 @@ public: * @param model_list [description] */ void set_model_list(const std::vector<std::shared_ptr<Model>>& model_list); + + inline LOSS_TYPE type() const {return LOSS_TYPE::CONVEX_HULL;} + }; #endif diff --git a/src/loss_function/LossFunctionLogPearsonRMSE.cpp b/src/loss_function/LossFunctionLogPearsonRMSE.cpp index 090446444ce817152412f4528976611f1b232b8b..5c69ad94767e26d9ad138e81d56d91dd8e162d59 100644 --- a/src/loss_function/LossFunctionLogPearsonRMSE.cpp +++ b/src/loss_function/LossFunctionLogPearsonRMSE.cpp @@ -7,25 +7,17 @@ LossFunctionLogPearsonRMSE::LossFunctionLogPearsonRMSE( std::vector<int> task_sizes_test, bool fix_intercept ) : - LossFunctionPearsonRMSE(prop_train, prop_test, task_sizes_train, task_sizes_test), + LossFunctionPearsonRMSE(prop_train, prop_test, task_sizes_train, task_sizes_test, fix_intercept), _log_feat(_n_samp, 0.0) { - std::transform( - _prop_train.begin(), - _prop_train.end(), - _prop_train.begin(), - [](double val){return std::log10(val);} - ); std::copy_n(_prop_train.begin(), _prop_train.size(), _projection_prop.begin()); - - std::transform( - _prop_test.begin(), - _prop_test.end(), - _prop_test.begin(), - [](double val){return std::log10(val);} - ); } +LossFunctionLogPearsonRMSE::LossFunctionLogPearsonRMSE(std::shared_ptr<LossFunction> o) : + LossFunctionPearsonRMSE(o), + _log_feat(_n_samp, 0.0) +{} + void LossFunctionLogPearsonRMSE::set_model_list(const std::vector<std::shared_ptr<Model>>& model_list) { _model_last_gen = model_list; @@ -53,7 +45,6 @@ void LossFunctionLogPearsonRMSE::set_temp_prop(double* temp_prop) double LossFunctionLogPearsonRMSE::project(const node_ptr& feat) { - std::copy_n(_projection_prop_sum.data(), _projection_prop_sum.size(), _c.data()); std::fill_n(_scores.begin(), _scores.size(), 0.0); double* val_ptr = feat->value_ptr(-1, true); @@ -64,7 +55,11 @@ double LossFunctionLogPearsonRMSE::project(const node_ptr& feat) [](double val){return std::log10(val);} ); - return calc_max_pearson(_log_feat.data()); + double r2 = calc_max_pearson(_log_feat.data()); + // std::cout << r2 << '\t' << std::isfinite(r2) << '\t' << *std::max_element(val_ptr, val_ptr + _n_samp, [](double v1, double v2){return std::abs(v1) < std::abs(v1);}) << std::endl; + // std::cout << r2 << '\t' << std::isfinite(r2) << '\t' << *std::min_element(val_ptr, val_ptr + _n_samp) << std::endl; + // std::cout << r2 << '\t' << std::isfinite(r2) << '\t' << *std::min_element(_projection_prop.begin(), _projection_prop.end()) << std::endl; + return r2 * (std::isfinite(r2)); } void LossFunctionLogPearsonRMSE::set_a(const std::vector<int>& inds, int taskind, int start) diff --git a/src/loss_function/LossFunctionLogPearsonRMSE.hpp b/src/loss_function/LossFunctionLogPearsonRMSE.hpp index d08bfe436d80453e17eecc2fca802a81cf5540da..4ae036d83058cd11179b66c8eccc4dc7bdca6887 100644 --- a/src/loss_function/LossFunctionLogPearsonRMSE.hpp +++ b/src/loss_function/LossFunctionLogPearsonRMSE.hpp @@ -33,6 +33,8 @@ public: bool fix_intercept = false ); + LossFunctionLogPearsonRMSE(std::shared_ptr<LossFunction> o); + /** * @brief Copy Constructor * @@ -97,6 +99,8 @@ public: * @param model_list [description] */ void set_model_list(const std::vector<std::shared_ptr<Model>>& model_list); + + inline LOSS_TYPE type() const {return LOSS_TYPE::LOG_PEARSON_RMSE;} }; #endif diff --git a/src/loss_function/LossFunctionPearsonRMSE.cpp b/src/loss_function/LossFunctionPearsonRMSE.cpp index 434af6dfe2e16e30e6a9de72535400fc2050c805..a1647e31ff227017890f492716c05b9cebca8606 100644 --- a/src/loss_function/LossFunctionPearsonRMSE.cpp +++ b/src/loss_function/LossFunctionPearsonRMSE.cpp @@ -7,15 +7,21 @@ LossFunctionPearsonRMSE::LossFunctionPearsonRMSE( std::vector<int> task_sizes_test, bool fix_intercept ) : - LossFunction(prop_train, prop_test, task_sizes_train, task_sizes_test), - _a((1 + (!fix_intercept)) * _n_samp, 1.0), + LossFunction(prop_train, prop_test, task_sizes_train, task_sizes_test, fix_intercept), + _a(_n_dim * _n_samp, 1.0), _b(prop_train), - _c(1, 0.0), - _projection_prop_sum(_n_task, 0.0), - _coefs(1 + (!fix_intercept), 0.0), - _lwork(0), - _n_feat(1), - _fix_intercept(fix_intercept) + _c(_n_task, 0.0), + _coefs(_n_task * _n_dim, 0.0), + _lwork(0) +{} + +LossFunctionPearsonRMSE::LossFunctionPearsonRMSE(std::shared_ptr<LossFunction> o) : + LossFunction(o), + _a(_n_dim * _n_samp, 1.0), + _b(_prop_train), + _c(_n_task, 0.0), + _coefs(_n_task * _n_dim, 0.0), + _lwork(0) {} void LossFunctionPearsonRMSE::set_model_list(const std::vector<std::shared_ptr<Model>>& model_list) @@ -73,37 +79,36 @@ void LossFunctionPearsonRMSE::prepare_project() set_temp_prop(temp_prop.data()); } - _projection_prop_sum.resize(_n_project_prop * _n_task); _scores.resize(_n_project_prop); _c.resize(_n_project_prop * _n_task, 0.0); - for(int pp = 0; pp < _n_project_prop; ++pp) { int start = 0; for(int tt = 0; tt < _n_task; ++tt) { - double prop_mean = util_funcs::mean(_projection_prop.data() + start, _task_sizes_train[tt]); - double prop_std = util_funcs::stand_dev(_projection_prop.data() + start, _task_sizes_train[tt], prop_mean); + double prop_mean = util_funcs::mean( + temp_prop.data() + pp * _n_samp + start, + _task_sizes_train[tt] + ); + + double prop_std = util_funcs::stand_dev( + temp_prop.data() + pp * _n_samp + start, + _task_sizes_train[tt] + ); + std::transform( - temp_prop.begin() + _n_project_prop * _n_samp + start, - temp_prop.begin() + _n_project_prop * _n_samp + start + _task_sizes_train[tt], - _projection_prop.begin() + _n_project_prop * start + tt * _task_sizes_train[tt], + temp_prop.begin() + pp * _n_samp + start, + temp_prop.begin() + pp * _n_samp + start + _task_sizes_train[tt], + _projection_prop.begin() + pp * _task_sizes_train[tt] + start * _n_project_prop, [prop_mean, prop_std](double p){return (p - prop_mean) / prop_std;} ); - _projection_prop_sum[tt * _n_project_prop + pp] = std::accumulate( - _projection_prop.begin() + _n_project_prop * start + tt * _task_sizes_train[tt], - _projection_prop.begin() + _n_project_prop * start + (tt + 1) * _task_sizes_train[tt], - 0.0 - ); start += _task_sizes_train[tt]; } } } double LossFunctionPearsonRMSE::project(const node_ptr& feat) { - std::copy_n(_projection_prop_sum.data(), _projection_prop_sum.size(), _c.data()); std::fill_n(_scores.begin(), _scores.size(), 0.0); - return calc_max_pearson(feat->value_ptr(-1, true)); } @@ -112,24 +117,22 @@ double LossFunctionPearsonRMSE::calc_max_pearson(double* feat_val_ptr) int start = 0; for(int tt = 0; tt < _task_sizes_train.size(); ++tt) { - double feat_mean = util_funcs::mean(feat_val_ptr + start, _task_sizes_train[tt]); - double feat_std = util_funcs::stand_dev(feat_val_ptr + start, _task_sizes_train[tt], feat_mean); + double feat_std = util_funcs::stand_dev(feat_val_ptr + start, _task_sizes_train[tt]); dgemm_( 'N', 'N', _n_project_prop, 1, _task_sizes_train[tt], - 1.0 / feat_std, + 1.0 / feat_std / static_cast<double>(_task_sizes_train[tt]), &_projection_prop[start * _n_project_prop], _n_project_prop, feat_val_ptr + start, _task_sizes_train[tt], - -1.0 * feat_mean / feat_std, + 0.0, &_c[tt * _n_project_prop], _n_project_prop ); - start += _task_sizes_train[tt]; std::transform( _scores.begin(), _scores.end(), @@ -137,8 +140,10 @@ double LossFunctionPearsonRMSE::calc_max_pearson(double* feat_val_ptr) _scores.begin(), [](double score, double cc){return score + cc * cc;} ); + + start += _task_sizes_train[tt]; } - return *std::max_element(_scores.begin(), _scores.end()) / static_cast<double>(_n_project_prop); + return *std::max_element(_scores.begin(), _scores.end()) / static_cast<double>(-1 * _n_task); } double LossFunctionPearsonRMSE::operator()(const std::vector<int>& inds) diff --git a/src/loss_function/LossFunctionPearsonRMSE.hpp b/src/loss_function/LossFunctionPearsonRMSE.hpp index 4c3ea1834a68ea9d67c60c2f05acd49a34c732eb..7c1789b95451341eae03b1e2f8315e6cd63dc3d3 100644 --- a/src/loss_function/LossFunctionPearsonRMSE.hpp +++ b/src/loss_function/LossFunctionPearsonRMSE.hpp @@ -14,21 +14,14 @@ class LossFunctionPearsonRMSE: public LossFunction { protected: - std::vector<double> _projection_prop; //!< The property vector used for projection - std::vector<double> _a; //!< The A matrix used for calculating the least squares solution of the problem std::vector<double> _b; //!< The property vector used to for solving the least squares solution std::vector<double> _c; //!< Vector used to temporarily store _projection_prop_sum - std::vector<double> _projection_prop_sum; //!< The sum of all elements for each projection_prop property std::vector<double> _scores; //!< Vector used to temporarily store _projection_prop_sum std::vector<double> _work; //!< Work vector for dgels std::vector<double> _coefs; //!< Coefficients for the least squares solution int _lwork; //!< Ideal size of the work size - int _n_feat; //!< Number of features - int _n_project_prop; //!< Number of properties to project over - int _n_dim; //!< Number of dimensions for the least squares problem - bool _fix_intercept; //!< True if the intercept is fixed public: @@ -48,6 +41,8 @@ public: bool fix_intercept = false ); + LossFunctionPearsonRMSE(std::shared_ptr<LossFunction> o); + /** * @brief Copy Constructor * @@ -169,6 +164,8 @@ public: * @param model_list [description] */ virtual void set_model_list(const std::vector<std::shared_ptr<Model>>& model_list); + + inline LOSS_TYPE type() const {return LOSS_TYPE::PEARSON_RMSE;} }; #endif diff --git a/src/loss_function/utils.cpp b/src/loss_function/utils.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bbbf65e39bbfa50a504ad8361ae7a4fbe5709dae --- /dev/null +++ b/src/loss_function/utils.cpp @@ -0,0 +1,61 @@ +#include "loss_function/utils.hpp" +std::shared_ptr<LossFunction> loss_function_util::get_loss_function( + std::string type, + std::vector<double> prop_train, + std::vector<double> prop_test, + std::vector<int> task_sizes_train, + std::vector<int> task_sizes_test, + bool fix_intercept +) +{ + if(type == "regression") + { + return std::make_shared<LossFunctionPearsonRMSE>( + prop_train, + prop_test, + task_sizes_train, + task_sizes_test, + fix_intercept + ); + } + else if(type == "log_regression") + { + return std::make_shared<LossFunctionLogPearsonRMSE>( + prop_train, + prop_test, + task_sizes_train, + task_sizes_test, + fix_intercept + ); + } + else if(type == "classification") + { + return std::make_shared<LossFunctionConvexHull>( + prop_train, + prop_test, + task_sizes_train, + task_sizes_test + ); + } + + throw std::logic_error("LossFunction type was not defined."); + return nullptr; +} + +std::shared_ptr<LossFunction> loss_function_util::copy(std::shared_ptr<LossFunction> loss) +{ + if(loss->type() == LOSS_TYPE::CONVEX_HULL) + { + return std::make_shared<LossFunctionPearsonRMSE>(loss); + } + else if(loss->type() == LOSS_TYPE::PEARSON_RMSE) + { + return std::make_shared<LossFunctionLogPearsonRMSE>(loss); + } + else if(loss->type() == LOSS_TYPE::LOG_PEARSON_RMSE) + { + return std::make_shared<LossFunctionConvexHull>(loss); + } + throw std::logic_error("LossFunction type was not defined in the copy function."); + return nullptr; +} diff --git a/src/loss_function/utils.hpp b/src/loss_function/utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dc8683306aef7abca7b3505f17fb7e2fd76b19da --- /dev/null +++ b/src/loss_function/utils.hpp @@ -0,0 +1,31 @@ +/** @file loss_function/utils.hpp + * @brief utilities for generating loss_functions + * + * @author Thomas A. R. Purcell (tpurcell) + * @bug No known bugs. + */ + +#ifndef LOSS_FUNCTION_UTILS +#define LOSS_FUNCTION_UTILS + +#include "loss_function/LossFunctionConvexHull.hpp" +#include "loss_function/LossFunctionLogPearsonRMSE.hpp" +#include "loss_function/LossFunctionPearsonRMSE.hpp" + +namespace loss_function_util +{ + +std::shared_ptr<LossFunction> get_loss_function( + std::string type, + std::vector<double> prop_train, + std::vector<double> prop_test, + std::vector<int> task_sizes_train, + std::vector<int> task_sizes_test, + bool fix_intercept = false +); + +std::shared_ptr<LossFunction> copy(std::shared_ptr<LossFunction> loss); + +}; + +#endif diff --git a/src/python/feature_creation/FeatureSpace.cpp b/src/python/feature_creation/FeatureSpace.cpp index a739122394a3e34da53a3510f0f4b02225e5a786..c31f8f7b08b05b06f2c66c19c26ccaf4d05596b2 100644 --- a/src/python/feature_creation/FeatureSpace.cpp +++ b/src/python/feature_creation/FeatureSpace.cpp @@ -202,7 +202,6 @@ FeatureSpace::FeatureSpace( _max_param_depth(-1), _reparam_residual(false) { - project_funcs::set_project_fxn(project_type, _task_sizes.size(), _project, _project_no_omp); comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list); mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select); @@ -318,7 +317,6 @@ FeatureSpace::FeatureSpace( _end_no_params.push_back( std::count_if(feat_n_params.begin(), feat_n_params.end(), [](int n_param){return n_param == 0;}) ); - std::cout << "out" << std::endl; } #endif _scores.resize(_n_feat); @@ -352,7 +350,6 @@ FeatureSpace::FeatureSpace( _max_param_depth(-1), _reparam_residual(false) { - project_funcs::set_project_fxn(project_type, _task_sizes.size(), _project, _project_no_omp); comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list); mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select); diff --git a/src/utils/enum.hpp b/src/utils/enum.hpp index 629ebfc0ef400b5aeed0775e8b56337b7c272a2e..cc24940a55d0889718b199a3d8a2193068653712 100644 --- a/src/utils/enum.hpp +++ b/src/utils/enum.hpp @@ -61,4 +61,11 @@ REG, LOG_REG }; + + enum class LOSS_TYPE + { + CONVEX_HULL, + PEARSON_RMSE, + LOG_PEARSON_RMSE + }; #endif diff --git a/src/utils/mkl_interface.hpp b/src/utils/mkl_interface.hpp index 6382c7eff7c7662d28d613faa65c531b98f43ae4..0ce0dcb79422aeeb2e10e697847aba37f1b6b44b 100644 --- a/src/utils/mkl_interface.hpp +++ b/src/utils/mkl_interface.hpp @@ -850,4 +850,4 @@ namespace } -#endif \ No newline at end of file +#endif