diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index 601759f73b447b2b8372b766ee21415e8ed2dc7d..9a7f9c33f3a0e8a8d93fe6ee8c20ebe4a562794f 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -58,6 +58,8 @@ FeatureSpace::FeatureSpace( ): _phi(phi_0), _phi_0(phi_0), + _end_no_params(1, phi_0.size()), + _start_gen_reparam(1, 0), _allowed_param_ops(allowed_param_ops), _allowed_ops(allowed_ops), _prop(prop), @@ -232,7 +234,7 @@ void FeatureSpace::initialize_fs_output_files() const } #ifdef PARAMETERIZE -void FeatureSpace::generate_new_feats( +void FeatureSpace::generate_param_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, unsigned long int& feat_ind, @@ -241,31 +243,44 @@ void FeatureSpace::generate_new_feats( const double u_bound ) { - unsigned long int phi_ind = feat - _phi.begin(); - feat_set.reserve(feat_set.size() + _un_operators.size() + phi_ind * (_com_bin_operators.size() + 2 * _bin_operators.size())); + feat_set.reserve(feat_set.size() + _un_param_operators.size() + phi_ind * (_com_bin_param_operators.size() + 2 * _bin_param_operators.size())); - for(auto& op : _un_operators) + for(auto& op : _un_param_operators) { - op(feat_set, *feat, feat_ind, l_bound, u_bound); + op(feat_set, *feat, feat_ind, l_bound, u_bound, optimizer); } - for(auto& op : _com_bin_operators) + for(auto& op : _com_bin_param_operators) { - for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2) + for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2) { - op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound); + op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer); } } - - for(auto& op : _bin_operators) + for(auto& op : _bin_param_operators) { - for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2) + for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2) { - op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound); - op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound); + op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer); + op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer); } } +} + +void FeatureSpace::generate_reparam_feats( + std::vector<node_ptr>::iterator& feat, + std::vector<node_ptr>& feat_set, + unsigned long int& feat_ind, + std::shared_ptr<NLOptimizer> optimizer, + const double l_bound, + const double u_bound +) +{ + int cur_rung = (*feat)->rung(); + unsigned long int max_n_feat = _end_no_params[cur_rung] + _phi_reparam.size(); + feat_set.reserve(feat_set.size() + _un_operators.size() + max_n_feat * (_com_bin_operators.size() + 2 * _bin_operators.size())); + for(auto& op : _un_param_operators) { op(feat_set, *feat, feat_ind, l_bound, u_bound, optimizer); @@ -273,22 +288,229 @@ void FeatureSpace::generate_new_feats( for(auto& op : _com_bin_param_operators) { - for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2) + for(int rr = 0; rr <= cur_rung; ++rr) + { + for(auto feat_2 = _phi.begin() + _start_gen[rr]; (feat_2 != feat) || (feat_2 != _phi.begin() + _end_no_params[rr]); ++feat_2) + { + op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer); + } + } + for(auto feat_2 = _phi_reparam.begin(); (feat_2 != feat) || (feat_2 != _phi_reparam.end()); ++feat_2) { op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer); } } for(auto& op : _bin_param_operators) { - for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2) + for(int rr = 0; rr <= cur_rung; ++rr) + { + for(auto feat_2 = _phi.begin() + _start_gen[rr]; (feat_2 != feat) || (feat_2 != _phi.begin() + _end_no_params[rr]); ++feat_2) + { + op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer); + op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer); + } + } + + for(auto feat_2 = _phi_reparam.begin(); (feat_2 != feat) || (feat_2 != _phi_reparam.end()); ++feat_2) { op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer); op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer); } } } -#else -void FeatureSpace::generate_new_feats( + +void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) +{ + double u_bound = 1e50; + double l_bound = 1e-50; + std::vector<int> inds; + + for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn) + { + node_value_arrs::clear_temp_reg(); + if(nn == _max_phi) + { + u_bound = _u_bound; + l_bound = _l_bound; + } + std::vector<node_ptr> next_phi; + _n_feat = _phi.size(); + + unsigned long int feat_ind = _phi.size() + _phi_reparam.size(); + + node_value_arrs::clear_temp_reg(); + double start = omp_get_wtime(); + #pragma omp parallel firstprivate(feat_ind, l_bound, u_bound) + { + std::vector<node_ptr> next_phi_private; + std::shared_ptr<NLOptimizer> optimizer_param = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth); + std::shared_ptr<NLOptimizer> optimizer_reparam = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop, _max_phi, _max_param_depth); + + #pragma omp for schedule(dynamic) + for(auto feat_1 = _phi_reparam.begin() + _start_gen_reparam.back() + _mpi_comm->rank(); feat_1 < _phi_reparam.end(); feat_1 += _mpi_comm->size()) + { + generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound); + generate_param_feats(feat_1, next_phi_private, feat_ind, optimizer_param, l_bound, u_bound); + } + + #pragma omp for schedule(dynamic) + for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size()) + { + generate_reparam_feats(feat_1, next_phi_private, feat_ind, optimizer_reparam, l_bound, u_bound); + } + + #pragma omp critical + next_phi.insert(next_phi.end(), next_phi_private.begin(), next_phi_private.end()); + } + _start_gen_reparam.push_back(_phi_reparam.size()); + node_value_arrs::clear_temp_reg(); + if(nn < _max_phi) + { + int new_phi_size; + int phi_size_start = _phi.size(); + if(_mpi_comm->rank() == 0) + { + std::vector<std::vector<node_ptr>> next_phi_gathered; + mpi::gather(*_mpi_comm, next_phi, next_phi_gathered, 0); + feat_ind = _phi.size(); + for(auto& next_phi_vec : next_phi_gathered) + { + _phi.insert(_phi.end(), next_phi_vec.begin(), next_phi_vec.end()); + } + new_phi_size = _phi.size(); + + // Sort the features to ensure consistent feature spaces for all MPI/OpenMP configurations + std::sort( + _phi.begin() + _start_gen.back(), + _phi.end(), + [feat_ind](node_ptr n1, node_ptr n2){return n1->sort_score(feat_ind) < n2->sort_score(feat_ind);} + ); + + // Reindex sorted features + std::for_each( + _phi.begin() + _start_gen.back(), + _phi.end(), + [&feat_ind](node_ptr n){n->reindex(feat_ind); ++feat_ind;} + ); + + mpi::broadcast(*_mpi_comm, new_phi_size, 0); + + for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb) + { + mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0); + } + } + else + { + mpi::gather(*_mpi_comm, next_phi, 0); + mpi::broadcast(*_mpi_comm, new_phi_size, 0); + _phi.resize(new_phi_size); + + for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb) + { + mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0); + } + } + + if(phi_size_start == new_phi_size) + { + throw std::logic_error("No features created during this rung (" + std::to_string(nn) + ")"); + } + + node_value_arrs::clear_temp_reg(); + if(nn < _max_phi) + { + // Remove identical features + std::vector<double> scores(_phi_reparam.size()); + _mpi_comm->barrier(); + project_funcs::project_r(_prop.data(), scores.data(), _phi_reparam, _task_sizes, 1); + scores.erase(scores.begin(), scores.begin() + _start_gen_reparam.back()); + inds = util_funcs::argsort<double>(scores); + + std::vector<int> del_inds; + + _mpi_comm->barrier(); + node_value_arrs::clear_temp_reg(); + for(int sc = 0; sc < _scores.size() - 1; ++sc) + { + #ifdef PARAMETERIZE + if(_phi_reparam[inds[sc] + _start_gen_reparam.back()]->n_params() > 0) + { + continue; + } + #endif + + if(_scores[inds[sc]] > -1e-10) + { + double base_val = std::abs( + util_funcs::r( + _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(), + _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(), + _n_samp + ) + ); + for(int sc2 = sc + 1; sc2 < _scores.size(); ++sc2) + { + double comp = std::abs( + base_val - std::abs( + util_funcs::r( + _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(), + _phi_reparam[_start_gen_reparam.back() + inds[sc2]]->value_ptr(0, true), + _n_samp + ) + ) + ); + if(comp < 1e-10) + { + del_inds.push_back(-1 * (inds[sc] + _start_gen_reparam.back())); + break; + } + } + } + else if(_scores[inds[sc + 1]] - _scores[inds[sc]] < 1e-10) + { + double base_val = std::abs( + util_funcs::r( + _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(), + _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(), + _n_samp + ) + ); + double comp = std::abs( + base_val - std::abs( + util_funcs::r( + _phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(), + _phi_reparam[_start_gen_reparam.back() + inds[sc + 1]]->value_ptr(0, true), + _n_samp + ) + ) + ); + if(comp < 1e-10) + { + del_inds.push_back(-1 * (inds[sc] + _start_gen.back())); + } + } + } + + inds = util_funcs::argsort<int>(del_inds); + for(int ii = 0; ii < inds.size(); ++ii) + { + _phi_reparam.erase(_phi_reparam.begin() - del_inds[inds[ii]]); + } + + // Reindex + for(int ff = _start_gen.back(); ff < _phi_reparam.size(); ++ff) + { + _phi_reparam[ff]->reindex(ff); + } + } + } + } + _n_feat = _phi.size(); +} +#endif + +void FeatureSpace::generate_non_param_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, unsigned long int& feat_ind, @@ -321,7 +543,6 @@ void FeatureSpace::generate_new_feats( } } } -#endif void FeatureSpace::generate_feature_space() { @@ -353,7 +574,8 @@ void FeatureSpace::generate_feature_space() #pragma omp for schedule(dynamic) for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size()) { - generate_new_feats(feat_1, next_phi_private, feat_ind, optimizer, l_bound, u_bound); + generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound); + generate_param_feats(feat_1, next_phi_private, feat_ind, optimizer, l_bound, u_bound); } #pragma omp critical @@ -366,7 +588,7 @@ void FeatureSpace::generate_feature_space() #pragma omp for schedule(dynamic) for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size()) { - generate_new_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound); + generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound); } #pragma omp critical @@ -509,6 +731,29 @@ void FeatureSpace::generate_feature_space() _phi.erase(_phi.begin() - del_inds[inds[ii]]); } + // Reorder features based on the number of parameters they have (none goes first) + std::vector<int> feat_n_params(_phi.size() - _start_gen.back()); + std::transform( + _phi.begin() + _start_gen.back(), + _phi.end(), + feat_n_params.begin(), + [](node_ptr feat){return feat->n_params();} + ); + inds = util_funcs::argsort<int>(feat_n_params); + next_phi.resize(feat_n_params.size()); + std::copy_n(_phi.begin() + _start_gen.back(), feat_n_params.size(), next_phi.begin()); + std::transform( + inds.begin(), + inds.end(), + _phi.begin() + _start_gen.back(), + [&next_phi](int ind){return next_phi[ind];} + ); + + // Set how many features have no parameters + _end_no_params.push_back( + std::count_if(feat_n_params.begin(), feat_n_params.end(), [](int n_param){return n_param == 0;}) + ); + // Reindex for(int ff = _start_gen.back(); ff < _phi.size(); ++ff) { @@ -700,16 +945,17 @@ void FeatureSpace::project_generated(const double* prop, const int size, std::ve int index_base = _phi.size() + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size()); #ifdef PARAMETERIZE - std::shared_ptr<NLOptimizer> optimizer; + std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth); + std::shared_ptr<NLOptimizer> reparam_optimizer; if(_reparam_residual) { std::vector<double> prop_vec(size, 0.0); std::copy_n(prop, size, prop_vec.data()); - optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop_vec, _max_phi, _max_param_depth); + reparam_optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop_vec, _max_phi, _max_param_depth); } else { - optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth); + reparam_optimizer = nullptr; } #endif @@ -724,9 +970,14 @@ void FeatureSpace::project_generated(const double* prop, const int size, std::ve bool is_sel = (*feat)->selected(); (*feat)->set_selected(false); #ifdef PARAMETERIZE - generate_new_feats(feat, generated_phi, feat_ind, optimizer, _l_bound, _u_bound); + generate_non_param_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound); + generate_param_feats(feat, generated_phi, feat_ind, optimizer, _l_bound, _u_bound); + if(reparam_optimizer) + { + generate_reparam_feats(feat, generated_phi, feat_ind, reparam_optimizer, _l_bound, _u_bound); + } #else - generate_new_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound); + generate_non_param_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound); #endif (*feat)->set_selected(is_sel); @@ -833,47 +1084,8 @@ void FeatureSpace::sis(const std::vector<double>& prop) if(_reparam_residual && (_phi_selected.size() > 0)) { double start_time = omp_get_wtime(); - - // Make a hard copy of the previously selected features - node_ptr copy; - for(int ff = _phi_selected.size() - _n_sis_select; ff < _phi_selected.size(); ++ff) - { - copy = _phi_selected[ff]->hard_copy(); - _phi_selected[ff]->set_selected(false); - _phi_selected[ff]->set_d_mat_ind(-1); - _phi_selected[ff] = copy; - } - - // Resize values array for reparameterization - node_value_arrs::resize_values_arr(0, _start_gen[1]); - // Reparameterize based on residuals - #pragma omp parallel - { - std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop, _max_phi, _max_param_depth); - #pragma omp for schedule(dynamic) - for(int ff = _start_gen[1]; ff < _phi.size(); ++ff) - { - if(_phi[ff]->n_params() > 0) - { - _phi[ff]->get_parameters(optimizer); - } - } - } - - // Reset the stored feature values - int max_store = (_n_rung_store == _max_phi) ? _phi.size() : _start_gen[_n_rung_store + 1]; - node_value_arrs::resize_values_arr(_n_rung_store, max_store); - for(int ff = _start_gen[1]; ff < max_store; ++ff) - { - _phi[ff]->set_value(); - _phi[ff]->test_value(); - } - - _mpi_comm->barrier(); - if(_mpi_comm->rank() == 0) - { - std::cout << "The time for reparameterization: " << omp_get_wtime() - start_time << " s" << std::endl; - } + _phi_reparam.resize(0); + generate_reparam_feature_set(prop); } #endif // Create output directories if needed diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index 2c0e99021e46471a4a6cacafe44f554898fe13da..36677682b76bd46c72a7a6b57369bc417c1fcd73 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -45,6 +45,10 @@ class FeatureSpace const std::vector<node_ptr> _phi_0; //!< initial feature space #ifdef PARAMETERIZE + std::vector<node_ptr> _phi_reparam; //!< The list of nodes used for reparameterization + std::vector<int> _end_no_params; //!< The list of indexes of each rung where parameterized nodes start + std::vector<int> _start_gen_reparam; //!< The list of indexes of each rung where parameterized nodes start + std::vector<un_param_op_node_gen> _un_param_operators; //!< list of all parameterized unary operators with free parameters std::vector<bin_param_op_node_gen> _com_bin_param_operators; //!< list of all parameterized commutable binary operators with free parameters std::vector<bin_param_op_node_gen> _bin_param_operators; //!< list of all parameterized binary operators with free parameters @@ -268,9 +272,28 @@ public: */ inline int n_rung_generate() const {return _n_rung_generate;} + /** + * @brief Generate a new set of non-parameterized features from a single feature + * @details Take in the feature and perform all valid algebraic operations on it. + * + * @param feat The feature to spawn new features from + * @param feat_set The feature set to pull features from for combinations + * @param feat_ind starting index for the next feature generated + * @param optimizer The object used to optimize the parameterized features + * @param l_bound lower bound for the absolute value of the feature + * @param u_bound upper bound for the abosulte value of the feature + */ + void generate_non_param_feats( + std::vector<node_ptr>::iterator& feat, + std::vector<node_ptr>& feat_set, + unsigned long int& feat_ind, + const double l_bound=1e-50, + const double u_bound=1e50 + ); + #ifdef PARAMETERIZE /** - * @brief Generate a new set of features from a single feature + * @brief Generate a new set of parameterized features from a single feature * @details Take in the feature and perform all valid algebraic operations on it. * * @param feat The feature to spawn new features from @@ -280,7 +303,7 @@ public: * @param l_bound lower bound for the absolute value of the feature * @param u_bound upper bound for the abosulte value of the feature */ - void generate_new_feats( + void generate_param_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, unsigned long int& feat_ind, @@ -288,24 +311,32 @@ public: const double l_bound=1e-50, const double u_bound=1e50 ); - #else + /** - * @brief Generate a new set of features from a single feature - * @details Take in the feature and perform all valid algebraic operations on it. + * @brief Generate a new set of parameterized features for the residuals * * @param feat The feature to spawn new features from * @param feat_set The feature set to pull features from for combinations * @param feat_ind starting index for the next feature generated + * @param optimizer The object used to optimize the parameterized features * @param l_bound lower bound for the absolute value of the feature * @param u_bound upper bound for the abosulte value of the feature */ - void generate_new_feats( + void generate_reparam_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, unsigned long int& feat_ind, + std::shared_ptr<NLOptimizer> optimizer, const double l_bound=1e-50, const double u_bound=1e50 ); + + /** + * @brief Generate reparameterized feature set + * + * @param prop The property to optimize against + */ + void generate_reparam_feature_set(const std::vector<double>& prop); #endif /**