From be2ed7a83cc9834fc0a54a35c2adebc26a1e1377 Mon Sep 17 00:00:00 2001 From: Thomas <purcell@fhi-berlin.mpg.de> Date: Wed, 7 Apr 2021 10:21:48 +0200 Subject: [PATCH] Add bug fixes from sissopp generate_node functions now use is_const of the new features --- .../feature_space/FeatureSpace.cpp | 54 +++++++++++-------- src/feature_creation/node/FeatureNode.cpp | 15 ++++++ src/feature_creation/node/FeatureNode.hpp | 6 +-- .../node/operator_nodes/OperatorNode.hpp | 27 ++++++++-- .../abs/absolute_value.cpp | 9 ++-- .../abs_diff/absolute_difference.cpp | 9 ++-- .../allowed_operator_nodes/add/add.cpp | 12 ++--- .../allowed_operator_nodes/cb/cube.cpp | 14 ++--- .../allowed_operator_nodes/cbrt/cube_root.cpp | 15 +++--- .../allowed_operator_nodes/cos/cos.cpp | 9 ++-- .../allowed_operator_nodes/div/divide.cpp | 10 ++-- .../exp/exponential.cpp | 17 +++--- .../allowed_operator_nodes/inv/inverse.cpp | 16 +++--- .../allowed_operator_nodes/log/log.cpp | 14 ++--- .../allowed_operator_nodes/mult/multiply.cpp | 10 ++-- .../neg_exp/negative_exponential.cpp | 14 ++--- .../allowed_operator_nodes/sin/sin.cpp | 12 ++--- .../allowed_operator_nodes/sp/sixth_power.cpp | 9 ++-- .../allowed_operator_nodes/sq/square.cpp | 9 ++-- .../sqrt/square_root.cpp | 9 ++-- .../allowed_operator_nodes/sub/subtract.cpp | 11 ++-- .../value_storage/nodes_value_containers.cpp | 45 ++++++++++++++-- .../value_storage/nodes_value_containers.hpp | 50 +++++++++++++++-- src/python/bindings_docstring_keyed.cpp | 4 +- 24 files changed, 259 insertions(+), 141 deletions(-) diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index ccd72d7c..86b41bbd 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -67,6 +67,17 @@ void FeatureSpace::initialize_fs(std::vector<double> prop, std::string project_t else if(_max_phi - _n_rung_generate < _n_rung_store) throw std::logic_error("Requesting to store more rungs than what can be pre-generated."); + node_value_arrs::set_task_sz_train(_task_sizes); + int n_max_ops = 0; + for(int rr = 0; rr < _max_phi - _n_rung_store; ++rr) + n_max_ops += std::pow(2, rr); + if((n_max_ops > _phi_0.size()) && (_n_rung_store == 0)) + { + std::cerr << "WARNING: Setting _n_rung_store to 1 to prevent possible overwrite issues" << std::endl; + ++_n_rung_store; + _n_rung_generate -= (_n_rung_generate == 1) && (_n_rung_store + _n_rung_generate > _max_phi); + } + initialize_fs_output_files(); project_funcs::set_project_fxn(project_type, _task_sizes.size(), _project, _project_no_omp); comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list); @@ -431,11 +442,10 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt std::vector<node_ptr> phi_sel_private(phi_sel); std::vector<double> scores_sel_private(scores_sel); - #pragma omp for schedule(dynamic) for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size()) { - unsigned long int feat_ind = _phi.size() + _n_sis_select * omp_get_num_threads(); + unsigned long int feat_ind = node_value_arrs::N_STORE_FEATURES + _n_sis_select * (omp_get_num_threads() + _mpi_comm->size()); node_value_arrs::clear_temp_reg_thread(); std::vector<node_ptr> generated_phi; @@ -452,9 +462,7 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt int ii = 0; while((ii < inds.size()) && (scores[inds[ii]] < -1.0)) - { ++ii; - } while((ii < inds.size()) && ((scores[inds[ii]] < worst_score) || (phi_sel_private.size() < _n_sis_select))) { @@ -463,16 +471,15 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt { if(scores_sel_private.size() == _n_sis_select) { - phi_sel_private[worst_score_ind]->set_selected(false); - phi_sel_private[worst_score_ind]->set_d_mat_ind(-1); - - generated_phi[inds[ii]]->reindex(_phi.size() + worst_score_ind + _n_sis_select * omp_get_thread_num()); + generated_phi[inds[ii]]->reindex(node_value_arrs::N_STORE_FEATURES + worst_score_ind + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size())); + generated_phi[inds[ii]]->set_value(); phi_sel_private[worst_score_ind] = generated_phi[inds[ii]]; scores_sel_private[worst_score_ind] = cur_score; } else { - generated_phi[inds[ii]]->reindex(_phi.size() + scores_sel_private.size() + _n_sis_select * omp_get_thread_num()); + generated_phi[inds[ii]]->reindex(node_value_arrs::N_STORE_FEATURES + scores_sel_private.size() + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size())); + generated_phi[inds[ii]]->set_value(); phi_sel_private.push_back(generated_phi[inds[ii]]); scores_sel_private.push_back(cur_score); } @@ -489,14 +496,19 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt { if(((phi_sel.size() < _n_sis_select) || (scores_sel_private[sc] < scores_sel[worst_score_ind])) && _is_valid_feat_list(phi_sel_private[sc]->value_ptr(0), _n_samp, _cross_cor_max, phi_sel, scores_sel, scores_sel_private[sc])) { + if(phi_sel.size() == _n_sis_select) { scores_sel[worst_score_ind] = scores_sel_private[sc]; + phi_sel_private[sc]->reindex(node_value_arrs::N_STORE_FEATURES + worst_score_ind + _n_sis_select * _mpi_comm->rank()); + phi_sel_private[sc]->set_value(); phi_sel[worst_score_ind] = phi_sel_private[sc]; } else { scores_sel.push_back(scores_sel_private[sc]); + phi_sel_private[sc]->reindex(node_value_arrs::N_STORE_FEATURES + phi_sel.size() + _n_sis_select * _mpi_comm->rank()); + phi_sel_private[sc]->set_value(); phi_sel.push_back(phi_sel_private[sc]); } worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin(); @@ -569,29 +581,26 @@ void FeatureSpace::sis(std::vector<double>& prop) std::cout << "Time to get best features on rank : " << omp_get_wtime() - start << " s" << std::endl; start = omp_get_wtime(); + + for(auto& feat : phi_sel) + { + feat->set_selected(false); + feat->set_d_mat_ind(-1); + } + if(_n_rung_generate > 0) { phi_sel.resize(cur_feat_local); scores_sel.resize(cur_feat_local); + node_ptr test_feat = phi_sel[0]; + project_generated(prop.data(), prop.size(), phi_sel, scores_sel); - node_value_arrs::clear_temp_reg(); - node_value_arrs::clear_temp_test_reg(); - for(auto& feat : _phi) - { - feat->set_selected(false); - feat->set_d_mat_ind(-1); - } _mpi_comm->barrier(); if(_mpi_comm->rank() == 0) std::cout << "Projection time for features generated on the fly: " << omp_get_wtime() - start << " s" << std::endl; } - for(auto& feat : phi_sel) - { - feat->set_selected(false); - feat->set_d_mat_ind(-1); - } std::fill_n(&scores_sel_all[cur_feat], _n_sis_select, 0.0); // If we are only on one process then phi_sel are the selected features start = omp_get_wtime(); @@ -602,14 +611,12 @@ void FeatureSpace::sis(std::vector<double>& prop) if(_mpi_comm->rank() == 0) { - std::cout << "setup" << std::endl; std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), std::numeric_limits<double>::infinity()); std::vector<node_ptr> sent_phi(_n_sis_select * _mpi_comm->size()); std::copy_n(scores_sel.begin(), _n_sis_select, sent_scores.begin()); std::copy_n(phi_sel.begin(), _n_sis_select, sent_phi.begin()); - std::cout << "recv" << std::endl; for(int rr = 1; rr < _mpi_comm->size(); ++rr) { _mpi_comm->recv(rr, _mpi_comm->cantorTagGen(rr, 0, 2, 0), &sent_scores[rr * _n_sis_select], _n_sis_select); @@ -631,6 +638,7 @@ void FeatureSpace::sis(std::vector<double>& prop) if((sent_scores[inds[ii]] != std::numeric_limits<double>::infinity()) && _is_valid(sent_phi[inds[ii]]->value_ptr(), _n_samp, _cross_cor_max, scores_sel_all, sent_scores[inds[ii]], cur_feat + cur_feat_local, cur_feat)) { + sent_phi[inds[ii]]->set_value(); out_file_stream << std::setw(14) <<std::left << cur_feat + cur_feat_local << sent_phi[inds[ii]]->postfix_expr() << std::endl; sum_file_stream << std::setw(14) <<std::left << cur_feat + cur_feat_local << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl; diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp index af95eca7..7224f844 100644 --- a/src/feature_creation/node/FeatureNode.cpp +++ b/src/feature_creation/node/FeatureNode.cpp @@ -20,6 +20,21 @@ FeatureNode::FeatureNode(unsigned long int feat_ind, std::string expr, std::vect FeatureNode::~FeatureNode() {} +bool FeatureNode::is_const() +{ + bool is_c = false; + int pos = 0; + + double* val_ptr = value_ptr(); + for(auto& sz : node_value_arrs::TASK_SZ_TRAIN) + { + double mean = util_funcs::mean(val_ptr + pos, sz); + is_c = is_c || std::all_of(val_ptr + pos, val_ptr + pos + sz, [&mean](double d){return std::abs(d - mean) < 1e-12;}); + pos += sz; + } + return is_c; +} + void FeatureNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) { if(add_sub_leaves.count(_expr) > 0) diff --git a/src/feature_creation/node/FeatureNode.hpp b/src/feature_creation/node/FeatureNode.hpp index edb15caa..e672e211 100644 --- a/src/feature_creation/node/FeatureNode.hpp +++ b/src/feature_creation/node/FeatureNode.hpp @@ -187,11 +187,7 @@ public: /** * @brief Check if feature is constant */ - inline bool is_const() - { - double mean = util_funcs::mean(value_ptr(), _n_samp); - return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;}); - } + bool is_const(); /** * @brief Returns the type of node this is diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.hpp b/src/feature_creation/node/operator_nodes/OperatorNode.hpp index 5384d0d5..7aaf5c26 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.hpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.hpp @@ -168,7 +168,11 @@ public: offset = (offset == -1) ? rung() : offset; if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_reg(_arr_ind, offset) != _feat_ind)) + { + _arr_ind = node_value_arrs::N_STORE_FEATURES + node_value_arrs::NEXT_IND; + node_value_arrs::NEXT_IND = (node_value_arrs::NEXT_IND + 1) % node_value_arrs::N_STORE_FEATURES; set_value(offset); + } return node_value_arrs::get_value_ptr(_arr_ind, _feat_ind, offset); } @@ -185,7 +189,11 @@ public: { offset = (offset == -1) ? rung() : offset; if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_test_reg(_arr_ind, offset) != _feat_ind)) + { + _arr_ind = node_value_arrs::N_STORE_FEATURES + node_value_arrs::NEXT_IND; + node_value_arrs::NEXT_IND = (node_value_arrs::NEXT_IND + 1) % node_value_arrs::N_STORE_FEATURES; set_test_value(offset); + } return node_value_arrs::get_test_value_ptr(_arr_ind, _feat_ind, offset); } @@ -194,15 +202,28 @@ public: /** * @brief Check if the feature contains NaN */ - inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});} + inline bool is_nan() + { + double* val_ptr = value_ptr(); + return std::any_of(val_ptr, val_ptr + _n_samp, [](double d){return !std::isfinite(d);}); + } // DocString: op_node_is_const /** * @brief Check if feature is constant */ - inline bool is_const() + bool is_const() { - return util_funcs::stand_dev(value_ptr(), _n_samp) < 1.0e-13; + double* val_ptr = value_ptr(); + + bool is_c = false;//util_funcs::stand_dev(val_ptr, _n_samp) < 1.0e-13; + int pos = 0; + for(auto& sz : node_value_arrs::TASK_SZ_TRAIN) + { + is_c = is_c || (util_funcs::stand_dev(val_ptr + pos, sz) < 1.0e-13); + pos += sz; + } + return is_c; } // DocString: op_node_rung diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp index a898ab74..dc5f96b0 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp @@ -10,14 +10,13 @@ void generateAbsNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if(*std::min_element(val_ptr, val_ptr + feat->n_samp()) > 0.0) return; - int offset = feat->rung() + 1; - val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::abs(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<AbsNode>(feat, feat_ind); + val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<AbsNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } AbsNode::AbsNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.cpp index 705b91d7..91f8054f 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.cpp @@ -19,14 +19,13 @@ void generateAbsDiffNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node if((std::abs(add_sub_tot_first) > 1) && std::all_of(add_sub_leaves.begin(), add_sub_leaves.end(), [&add_sub_tot_first](auto el){return std::abs(el.second) == add_sub_tot_first;})) return; - int offset = std::max(feat_1->rung(), feat_2->rung()) + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::abs_diff(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); + node_ptr new_feat = std::make_shared<AbsDiffNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<AbsDiffNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } AbsDiffNode::AbsDiffNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.cpp index 3d381cc6..e885764f 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.cpp @@ -18,14 +18,12 @@ void generateAddNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr if((std::abs(add_sub_tot_first) > 1) && std::all_of(add_sub_leaves.begin(), add_sub_leaves.end(), [&add_sub_tot_first](auto el){return std::abs(el.second) == add_sub_tot_first;})) return; - int offset = feat_1->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::add(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); + node_ptr new_feat = std::make_shared<AddNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) - return; - - feat_list.push_back(std::make_shared<AddNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } AddNode::AddNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.cpp index a77300d4..2db055bc 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.cpp @@ -6,14 +6,14 @@ void generateCbNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned lo if((feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::cb(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<CbNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<CbNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } CbNode::CbNode() @@ -30,7 +30,9 @@ CbNode::CbNode(node_ptr feat, unsigned long int feat_ind, double l_bound, double throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp index fdb3020c..bb999ff8 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp @@ -6,14 +6,14 @@ void generateCbrtNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned if((feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::cbrt(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<CbrtNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<CbrtNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } CbrtNode::CbrtNode() @@ -30,9 +30,10 @@ CbrtNode::CbrtNode(node_ptr feat, unsigned long int feat_ind, double l_bound, do throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) - throw InvalidFeatureException(); + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + throw InvalidFeatureException(); } void CbrtNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.cpp index 05c76497..3ea3cf7a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.cpp @@ -6,14 +6,13 @@ void generateCosNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::SIN) || (feat->type() == NODE_TYPE::COS)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::cos(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<CosNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<CosNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } CosNode::CosNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp index c109f338..6ca45a43 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp @@ -19,14 +19,12 @@ void generateDivNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr if((std::abs(div_mult_tot_first) != 1.0) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) return; - int offset = std::max(feat_1->rung(), feat_2->rung()) + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::div(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); - - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) + node_ptr new_feat = std::make_shared<DivNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<DivNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } DivNode::DivNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.cpp index 81fa3608..6c1c01ea 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.cpp @@ -6,14 +6,13 @@ void generateExpNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if((feat->unit() != Unit()) || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::ADD) || (feat->type() == NODE_TYPE::SUB) || (feat->type() == NODE_TYPE::LOG)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::exp(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<ExpNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) - return; - - feat_list.push_back(std::make_shared<ExpNode>(feat, feat_ind)); + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; + feat_list.push_back(new_feat); } ExpNode::ExpNode() @@ -33,7 +32,9 @@ ExpNode::ExpNode(node_ptr feat, unsigned long int feat_ind, double l_bound, doub throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp index f4d9a020..1177493e 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp @@ -6,14 +6,14 @@ void generateInvNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if((feat->type() == NODE_TYPE::DIV) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::inv(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<InvNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) - return; + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; - feat_list.push_back(std::make_shared<InvNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } InvNode::InvNode() @@ -30,7 +30,9 @@ InvNode::InvNode(node_ptr feat, unsigned long int feat_ind, double l_bound, doub throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp index fdeb581c..997a7d85 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp @@ -6,14 +6,14 @@ void generateLogNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::DIV) || (feat->type() == NODE_TYPE::INV) || (feat->type() == NODE_TYPE::MULT) || (feat->type() == NODE_TYPE::LOG) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQRT)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::log(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<LogNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<LogNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } LogNode::LogNode() @@ -33,7 +33,9 @@ LogNode::LogNode(node_ptr feat, unsigned long int feat_ind, double l_bound, doub throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.cpp index ed603250..3e78a6fc 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.cpp @@ -19,14 +19,12 @@ void generateMultNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_pt if((std::abs(div_mult_tot_first) - 1.0 > 1e-12) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return std::abs(el.second) == div_mult_tot_first;})) return; - int offset = std::max(feat_1->rung(), feat_2->rung()) + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::mult(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); - - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) + node_ptr new_feat = std::make_shared<MultNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<MultNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } MultNode::MultNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.cpp index 55614a39..cdaebd91 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.cpp @@ -6,14 +6,14 @@ void generateNegExpNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigne if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::ADD) || (feat->type() == NODE_TYPE::SUB) || (feat->type() == NODE_TYPE::LOG)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::neg_exp(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<NegExpNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<NegExpNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } NegExpNode::NegExpNode() @@ -33,7 +33,9 @@ NegExpNode::NegExpNode(node_ptr feat, unsigned long int feat_ind, double l_bound throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.cpp index 5914ec20..d503053f 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.cpp @@ -6,14 +6,12 @@ void generateSinNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::SIN) || (feat->type() == NODE_TYPE::COS)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sin(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<SinNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) - return; - - feat_list.push_back(std::make_shared<SinNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } SinNode::SinNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp index 8e2887aa..7dd1f6b2 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp @@ -6,14 +6,13 @@ void generateSixPowNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigne if((feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQRT) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sixth_pow(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<SixPowNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return (!std::isfinite(d)) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return (!std::isfinite(d)) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<SixPowNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } SixPowNode::SixPowNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.cpp index fa0edfc8..249e6f92 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.cpp @@ -6,14 +6,13 @@ void generateSqNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned lo if((feat->type() == NODE_TYPE::SQRT) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sq(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<SqNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<SqNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } SqNode::SqNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp index 6a126983..ea0fc869 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp @@ -6,14 +6,13 @@ void generateSqrtNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned if((feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sqrt(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<SqrtNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<SqrtNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } SqrtNode::SqrtNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.cpp index 53313a68..a338297a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.cpp @@ -17,14 +17,13 @@ void generateSubNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr if((std::abs(add_sub_tot_first) > 1) && std::all_of(add_sub_leaves.begin(), add_sub_leaves.end(), [&add_sub_tot_first](auto el){return std::abs(el.second) == add_sub_tot_first;})) return; - int offset = std::max(feat_1->rung(), feat_2->rung()) + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sub(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); + node_ptr new_feat = std::make_shared<SubNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) - return; + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; - feat_list.push_back(std::make_shared<SubNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } SubNode::SubNode() diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.cpp b/src/feature_creation/node/value_storage/nodes_value_containers.cpp index 26d8945b..c92eda12 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.cpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.cpp @@ -1,23 +1,33 @@ #include <feature_creation/node/value_storage/nodes_value_containers.hpp> - +#include <iostream> int node_value_arrs::N_SELECTED = 0; int node_value_arrs::N_SAMPLES = 0; -int node_value_arrs::N_STORE_FEATURES = 0; +unsigned long int node_value_arrs::N_STORE_FEATURES = 0; int node_value_arrs::N_RUNGS_STORED = 0; int node_value_arrs::N_SAMPLES_TEST = 0; int node_value_arrs::MAX_N_THREADS = omp_get_max_threads(); +unsigned long int node_value_arrs::NEXT_IND = 0; std::vector<int> node_value_arrs::TEMP_STORAGE_REG; std::vector<int> node_value_arrs::TEMP_STORAGE_TEST_REG; +std::vector<int> node_value_arrs::TASK_SZ_TRAIN; +std::vector<int> node_value_arrs::TASK_SZ_TEST; + std::vector<double> node_value_arrs::D_MATRIX; std::vector<double> node_value_arrs::VALUES_ARR; std::vector<double> node_value_arrs::TEST_VALUES_ARR; std::vector<double> node_value_arrs::TEMP_STORAGE_ARR; std::vector<double> node_value_arrs::TEMP_STORAGE_TEST_ARR; -void node_value_arrs::initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat) +void node_value_arrs::initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat, bool set_task_sz) { + if(set_task_sz) + TASK_SZ_TRAIN = {n_samples}; + + if(set_task_sz) + TASK_SZ_TEST = {n_samples_test}; + N_SAMPLES = n_samples; N_SAMPLES_TEST = n_samples_test; N_RUNGS_STORED = 0; @@ -33,7 +43,34 @@ void node_value_arrs::initialize_values_arr(int n_samples, int n_samples_test, i TEMP_STORAGE_TEST_REG = std::vector<int>(MAX_N_THREADS * (3 * N_STORE_FEATURES + 1), -1); } -void node_value_arrs::resize_values_arr(int n_dims, int n_feat, bool use_temp) +void node_value_arrs::initialize_values_arr(std::vector<int> task_sz_train, std::vector<int> task_sz_test, int n_primary_feat) +{ + TASK_SZ_TRAIN = task_sz_train; + TASK_SZ_TEST = task_sz_test; + + initialize_values_arr( + std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0), + std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0), + n_primary_feat, + false + ); +} + +void node_value_arrs::set_task_sz_train(std::vector<int> task_sz_train) +{ + if(std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0) != N_SAMPLES) + throw std::logic_error("The total number of samples has changed, task_sz_train is wrong."); + TASK_SZ_TRAIN = task_sz_train; +} + +void node_value_arrs::set_task_sz_test(std::vector<int> task_sz_test) +{ + if(std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0) != N_SAMPLES_TEST) + throw std::logic_error("The total number of test samples has changed, task_sz_test is wrong."); + TASK_SZ_TEST = task_sz_test; +} + +void node_value_arrs::resize_values_arr(int n_dims, unsigned long int n_feat, bool use_temp) { N_RUNGS_STORED = n_dims; N_STORE_FEATURES = n_feat; diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.hpp b/src/feature_creation/node/value_storage/nodes_value_containers.hpp index 09c08022..c2a78277 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.hpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.hpp @@ -12,6 +12,7 @@ #include <algorithm> #include <memory> +#include <numeric> #include <vector> #include <omp.h> @@ -28,12 +29,28 @@ namespace node_value_arrs extern std::vector<int> TEMP_STORAGE_REG; //!< Register to see which feature is stored in each slot for the training data extern std::vector<int> TEMP_STORAGE_TEST_REG; //!< Register to see which feature is stored in each slot for the test data + extern std::vector<int> TASK_SZ_TRAIN; //!< Number of training samples per task + extern std::vector<int> TASK_SZ_TEST; //!< Number of test sample per task + extern int N_SELECTED; //!< Number of features selected extern int N_SAMPLES; //!< Number of training samples for each feature extern int N_SAMPLES_TEST; //!< Number of test samples for each feature - extern int N_STORE_FEATURES; //!< Number of features with stored values + extern unsigned long int N_STORE_FEATURES; //!< Number of features with stored values extern int N_RUNGS_STORED; //!< Number of rungs with values stored extern int MAX_N_THREADS; //!< Get the maximum number of threads possible + extern unsigned long int NEXT_IND; //!< The next array index to use + + /** + * @brief Initialize the node value arrays + * @details Using the size of the initial feature space constructor the storage arrays + * + * @param n_samples Number of training samples for each feature + * @param n_samples_test Number of test samples for each feature + * @param n_primary_feat Number of primary features + * @param set_test_task_sz If True reset the task_sz vectors + */ + void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat, bool et_task_sz); + /** * @brief Initialize the node value arrays * @details Using the size of the initial feature space constructor the storage arrays @@ -42,7 +59,20 @@ namespace node_value_arrs * @param n_samples_test Number of test samples for each feature * @param n_primary_feat Number of primary features */ - void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat); + inline void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat) + { + initialize_values_arr(n_samples, n_samples_test, n_primary_feat, true); + } + + /** + * @brief Initialize the node value arrays + * @details Using the size of the initial feature space constructor the storage arrays + * + * @param task_sz_train Number of training samples per task + * @param task_sz_test Number of test sample per task + * @param n_primary_feat Number of primary features + */ + void initialize_values_arr(std::vector<int> task_sz_train, std::vector<int> task_sz_test, int n_primary_feat); /** * @brief Resize the node value arrays @@ -52,7 +82,7 @@ namespace node_value_arrs * @param n_feat number of features to store * @param use_temp If true keep the temporary_storage */ - void resize_values_arr(int n_dims, int n_feat, bool use_temp); + void resize_values_arr(int n_dims, unsigned long int n_feat, bool use_temp); /** * @brief Initialize the descriptor matrix @@ -69,6 +99,20 @@ namespace node_value_arrs */ void resize_d_matrix_arr(int n_select); + /** + * @brief Reset the global TASK_SZ_TRAIN vector + * + * @param task_sz_train the new task_sz train + */ + void set_task_sz_train(std::vector<int> task_sz_train); + + /** + * @brief Reset the global TASK_SZ_TEST vector + * + * @param task_sz_train the new test_sz train + */ + void set_task_sz_test(std::vector<int> task_sz_test); + /** * @brief Get a reference slot/feature register of the training data * diff --git a/src/python/bindings_docstring_keyed.cpp b/src/python/bindings_docstring_keyed.cpp index 0bcfdec0..32d5e90c 100644 --- a/src/python/bindings_docstring_keyed.cpp +++ b/src/python/bindings_docstring_keyed.cpp @@ -37,8 +37,10 @@ void sisso::register_all() sisso::feature_creation::node::registerSqrtNode(); sisso::feature_creation::node::registerSixPowNode(); + void (*init_val_ar)(int, int, int) = &node_value_arrs::initialize_values_arr; + def("phi_selected_from_file", &str2node::phi_selected_from_file_py); - def("initialize_values_arr", &node_value_arrs::initialize_values_arr); + def("initialize_values_arr", init_val_ar); def("initialize_d_matrix_arr", &node_value_arrs::initialize_d_matrix_arr); } -- GitLab