diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index ccd72d7cc5d2f5d4964450814eb9f5671fbe00e0..86b41bbd3d988a5129e6166e42aa3054a7577770 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -67,6 +67,17 @@ void FeatureSpace::initialize_fs(std::vector<double> prop, std::string project_t else if(_max_phi - _n_rung_generate < _n_rung_store) throw std::logic_error("Requesting to store more rungs than what can be pre-generated."); + node_value_arrs::set_task_sz_train(_task_sizes); + int n_max_ops = 0; + for(int rr = 0; rr < _max_phi - _n_rung_store; ++rr) + n_max_ops += std::pow(2, rr); + if((n_max_ops > _phi_0.size()) && (_n_rung_store == 0)) + { + std::cerr << "WARNING: Setting _n_rung_store to 1 to prevent possible overwrite issues" << std::endl; + ++_n_rung_store; + _n_rung_generate -= (_n_rung_generate == 1) && (_n_rung_store + _n_rung_generate > _max_phi); + } + initialize_fs_output_files(); project_funcs::set_project_fxn(project_type, _task_sizes.size(), _project, _project_no_omp); comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list); @@ -431,11 +442,10 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt std::vector<node_ptr> phi_sel_private(phi_sel); std::vector<double> scores_sel_private(scores_sel); - #pragma omp for schedule(dynamic) for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size()) { - unsigned long int feat_ind = _phi.size() + _n_sis_select * omp_get_num_threads(); + unsigned long int feat_ind = node_value_arrs::N_STORE_FEATURES + _n_sis_select * (omp_get_num_threads() + _mpi_comm->size()); node_value_arrs::clear_temp_reg_thread(); std::vector<node_ptr> generated_phi; @@ -452,9 +462,7 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt int ii = 0; while((ii < inds.size()) && (scores[inds[ii]] < -1.0)) - { ++ii; - } while((ii < inds.size()) && ((scores[inds[ii]] < worst_score) || (phi_sel_private.size() < _n_sis_select))) { @@ -463,16 +471,15 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt { if(scores_sel_private.size() == _n_sis_select) { - phi_sel_private[worst_score_ind]->set_selected(false); - phi_sel_private[worst_score_ind]->set_d_mat_ind(-1); - - generated_phi[inds[ii]]->reindex(_phi.size() + worst_score_ind + _n_sis_select * omp_get_thread_num()); + generated_phi[inds[ii]]->reindex(node_value_arrs::N_STORE_FEATURES + worst_score_ind + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size())); + generated_phi[inds[ii]]->set_value(); phi_sel_private[worst_score_ind] = generated_phi[inds[ii]]; scores_sel_private[worst_score_ind] = cur_score; } else { - generated_phi[inds[ii]]->reindex(_phi.size() + scores_sel_private.size() + _n_sis_select * omp_get_thread_num()); + generated_phi[inds[ii]]->reindex(node_value_arrs::N_STORE_FEATURES + scores_sel_private.size() + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size())); + generated_phi[inds[ii]]->set_value(); phi_sel_private.push_back(generated_phi[inds[ii]]); scores_sel_private.push_back(cur_score); } @@ -489,14 +496,19 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt { if(((phi_sel.size() < _n_sis_select) || (scores_sel_private[sc] < scores_sel[worst_score_ind])) && _is_valid_feat_list(phi_sel_private[sc]->value_ptr(0), _n_samp, _cross_cor_max, phi_sel, scores_sel, scores_sel_private[sc])) { + if(phi_sel.size() == _n_sis_select) { scores_sel[worst_score_ind] = scores_sel_private[sc]; + phi_sel_private[sc]->reindex(node_value_arrs::N_STORE_FEATURES + worst_score_ind + _n_sis_select * _mpi_comm->rank()); + phi_sel_private[sc]->set_value(); phi_sel[worst_score_ind] = phi_sel_private[sc]; } else { scores_sel.push_back(scores_sel_private[sc]); + phi_sel_private[sc]->reindex(node_value_arrs::N_STORE_FEATURES + phi_sel.size() + _n_sis_select * _mpi_comm->rank()); + phi_sel_private[sc]->set_value(); phi_sel.push_back(phi_sel_private[sc]); } worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin(); @@ -569,29 +581,26 @@ void FeatureSpace::sis(std::vector<double>& prop) std::cout << "Time to get best features on rank : " << omp_get_wtime() - start << " s" << std::endl; start = omp_get_wtime(); + + for(auto& feat : phi_sel) + { + feat->set_selected(false); + feat->set_d_mat_ind(-1); + } + if(_n_rung_generate > 0) { phi_sel.resize(cur_feat_local); scores_sel.resize(cur_feat_local); + node_ptr test_feat = phi_sel[0]; + project_generated(prop.data(), prop.size(), phi_sel, scores_sel); - node_value_arrs::clear_temp_reg(); - node_value_arrs::clear_temp_test_reg(); - for(auto& feat : _phi) - { - feat->set_selected(false); - feat->set_d_mat_ind(-1); - } _mpi_comm->barrier(); if(_mpi_comm->rank() == 0) std::cout << "Projection time for features generated on the fly: " << omp_get_wtime() - start << " s" << std::endl; } - for(auto& feat : phi_sel) - { - feat->set_selected(false); - feat->set_d_mat_ind(-1); - } std::fill_n(&scores_sel_all[cur_feat], _n_sis_select, 0.0); // If we are only on one process then phi_sel are the selected features start = omp_get_wtime(); @@ -602,14 +611,12 @@ void FeatureSpace::sis(std::vector<double>& prop) if(_mpi_comm->rank() == 0) { - std::cout << "setup" << std::endl; std::vector<double> sent_scores(_n_sis_select * _mpi_comm->size(), std::numeric_limits<double>::infinity()); std::vector<node_ptr> sent_phi(_n_sis_select * _mpi_comm->size()); std::copy_n(scores_sel.begin(), _n_sis_select, sent_scores.begin()); std::copy_n(phi_sel.begin(), _n_sis_select, sent_phi.begin()); - std::cout << "recv" << std::endl; for(int rr = 1; rr < _mpi_comm->size(); ++rr) { _mpi_comm->recv(rr, _mpi_comm->cantorTagGen(rr, 0, 2, 0), &sent_scores[rr * _n_sis_select], _n_sis_select); @@ -631,6 +638,7 @@ void FeatureSpace::sis(std::vector<double>& prop) if((sent_scores[inds[ii]] != std::numeric_limits<double>::infinity()) && _is_valid(sent_phi[inds[ii]]->value_ptr(), _n_samp, _cross_cor_max, scores_sel_all, sent_scores[inds[ii]], cur_feat + cur_feat_local, cur_feat)) { + sent_phi[inds[ii]]->set_value(); out_file_stream << std::setw(14) <<std::left << cur_feat + cur_feat_local << sent_phi[inds[ii]]->postfix_expr() << std::endl; sum_file_stream << std::setw(14) <<std::left << cur_feat + cur_feat_local << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl; diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp index af95eca7c0368673688f1d91aac71e0c90a0f7f3..7224f844f818fef7c696086c483ad2d904b9decc 100644 --- a/src/feature_creation/node/FeatureNode.cpp +++ b/src/feature_creation/node/FeatureNode.cpp @@ -20,6 +20,21 @@ FeatureNode::FeatureNode(unsigned long int feat_ind, std::string expr, std::vect FeatureNode::~FeatureNode() {} +bool FeatureNode::is_const() +{ + bool is_c = false; + int pos = 0; + + double* val_ptr = value_ptr(); + for(auto& sz : node_value_arrs::TASK_SZ_TRAIN) + { + double mean = util_funcs::mean(val_ptr + pos, sz); + is_c = is_c || std::all_of(val_ptr + pos, val_ptr + pos + sz, [&mean](double d){return std::abs(d - mean) < 1e-12;}); + pos += sz; + } + return is_c; +} + void FeatureNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) { if(add_sub_leaves.count(_expr) > 0) diff --git a/src/feature_creation/node/FeatureNode.hpp b/src/feature_creation/node/FeatureNode.hpp index edb15caac311ce38b879b35866a49c17a9525fe5..e672e211697f6120963f44f7fec534de54e935a3 100644 --- a/src/feature_creation/node/FeatureNode.hpp +++ b/src/feature_creation/node/FeatureNode.hpp @@ -187,11 +187,7 @@ public: /** * @brief Check if feature is constant */ - inline bool is_const() - { - double mean = util_funcs::mean(value_ptr(), _n_samp); - return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;}); - } + bool is_const(); /** * @brief Returns the type of node this is diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.hpp b/src/feature_creation/node/operator_nodes/OperatorNode.hpp index 5384d0d50fbbab01f463e8756ccb97f21d4666ec..7aaf5c26671376fc7aa27e06478c3e4ef4411e01 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.hpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.hpp @@ -168,7 +168,11 @@ public: offset = (offset == -1) ? rung() : offset; if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_reg(_arr_ind, offset) != _feat_ind)) + { + _arr_ind = node_value_arrs::N_STORE_FEATURES + node_value_arrs::NEXT_IND; + node_value_arrs::NEXT_IND = (node_value_arrs::NEXT_IND + 1) % node_value_arrs::N_STORE_FEATURES; set_value(offset); + } return node_value_arrs::get_value_ptr(_arr_ind, _feat_ind, offset); } @@ -185,7 +189,11 @@ public: { offset = (offset == -1) ? rung() : offset; if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_test_reg(_arr_ind, offset) != _feat_ind)) + { + _arr_ind = node_value_arrs::N_STORE_FEATURES + node_value_arrs::NEXT_IND; + node_value_arrs::NEXT_IND = (node_value_arrs::NEXT_IND + 1) % node_value_arrs::N_STORE_FEATURES; set_test_value(offset); + } return node_value_arrs::get_test_value_ptr(_arr_ind, _feat_ind, offset); } @@ -194,15 +202,28 @@ public: /** * @brief Check if the feature contains NaN */ - inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});} + inline bool is_nan() + { + double* val_ptr = value_ptr(); + return std::any_of(val_ptr, val_ptr + _n_samp, [](double d){return !std::isfinite(d);}); + } // DocString: op_node_is_const /** * @brief Check if feature is constant */ - inline bool is_const() + bool is_const() { - return util_funcs::stand_dev(value_ptr(), _n_samp) < 1.0e-13; + double* val_ptr = value_ptr(); + + bool is_c = false;//util_funcs::stand_dev(val_ptr, _n_samp) < 1.0e-13; + int pos = 0; + for(auto& sz : node_value_arrs::TASK_SZ_TRAIN) + { + is_c = is_c || (util_funcs::stand_dev(val_ptr + pos, sz) < 1.0e-13); + pos += sz; + } + return is_c; } // DocString: op_node_rung diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp index a898ab74ffb61d116dd3076fea4fd4fdd102a4d4..dc5f96b010c480acb4f2d6ce2ae07ebc1a1158a3 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp @@ -10,14 +10,13 @@ void generateAbsNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if(*std::min_element(val_ptr, val_ptr + feat->n_samp()) > 0.0) return; - int offset = feat->rung() + 1; - val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::abs(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<AbsNode>(feat, feat_ind); + val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<AbsNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } AbsNode::AbsNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.cpp index 705b91d7af346aaa43c514b2d3973a982cf3445c..91f8054fbeae5291534c2ad4c6df93611497a53c 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.cpp @@ -19,14 +19,13 @@ void generateAbsDiffNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node if((std::abs(add_sub_tot_first) > 1) && std::all_of(add_sub_leaves.begin(), add_sub_leaves.end(), [&add_sub_tot_first](auto el){return std::abs(el.second) == add_sub_tot_first;})) return; - int offset = std::max(feat_1->rung(), feat_2->rung()) + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::abs_diff(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); + node_ptr new_feat = std::make_shared<AbsDiffNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<AbsDiffNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } AbsDiffNode::AbsDiffNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.cpp index 3d381cc610844ae1117b63bf19b5be632aaaab85..e885764f5b90c3ce853145209d4e50535362bc28 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.cpp @@ -18,14 +18,12 @@ void generateAddNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr if((std::abs(add_sub_tot_first) > 1) && std::all_of(add_sub_leaves.begin(), add_sub_leaves.end(), [&add_sub_tot_first](auto el){return std::abs(el.second) == add_sub_tot_first;})) return; - int offset = feat_1->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::add(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); + node_ptr new_feat = std::make_shared<AddNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) - return; - - feat_list.push_back(std::make_shared<AddNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } AddNode::AddNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.cpp index a77300d4fc70bcfa3c048f50263c03ad07d5fcdb..2db055bc28236bbbeb1a2b4b2cdb181ac5103e29 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.cpp @@ -6,14 +6,14 @@ void generateCbNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned lo if((feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::cb(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<CbNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<CbNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } CbNode::CbNode() @@ -30,7 +30,9 @@ CbNode::CbNode(node_ptr feat, unsigned long int feat_ind, double l_bound, double throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp index fdb3020cf9a51b3bf78f34af1c009921a4d8839c..bb999ff82d07534d4a47de31c0a910f4b3e3ddd5 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp @@ -6,14 +6,14 @@ void generateCbrtNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned if((feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::cbrt(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<CbrtNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<CbrtNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } CbrtNode::CbrtNode() @@ -30,9 +30,10 @@ CbrtNode::CbrtNode(node_ptr feat, unsigned long int feat_ind, double l_bound, do throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) - throw InvalidFeatureException(); + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + throw InvalidFeatureException(); } void CbrtNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.cpp index 05c76497cec75ddcaf6854760e1f408cc9df5f46..3ea3cf7ae0b3688ee223f455069dda6b680295ec 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.cpp @@ -6,14 +6,13 @@ void generateCosNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::SIN) || (feat->type() == NODE_TYPE::COS)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::cos(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<CosNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<CosNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } CosNode::CosNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp index c109f338213695ada9940fb4a4afefd9a8843c18..6ca45a438207aca79313f4352339778d241ff69b 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp @@ -19,14 +19,12 @@ void generateDivNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr if((std::abs(div_mult_tot_first) != 1.0) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return el.second == div_mult_tot_first;})) return; - int offset = std::max(feat_1->rung(), feat_2->rung()) + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::div(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); - - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) + node_ptr new_feat = std::make_shared<DivNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<DivNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } DivNode::DivNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.cpp index 81fa36085c93a8ea868fba6d4852aea273ad2763..6c1c01ea6b9c4721f6707bf2e547a3dde99b6c18 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.cpp @@ -6,14 +6,13 @@ void generateExpNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if((feat->unit() != Unit()) || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::ADD) || (feat->type() == NODE_TYPE::SUB) || (feat->type() == NODE_TYPE::LOG)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::exp(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<ExpNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) - return; - - feat_list.push_back(std::make_shared<ExpNode>(feat, feat_ind)); + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; + feat_list.push_back(new_feat); } ExpNode::ExpNode() @@ -33,7 +32,9 @@ ExpNode::ExpNode(node_ptr feat, unsigned long int feat_ind, double l_bound, doub throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp index f4d9a0205668c35c2ff3a352924c232719f5baa7..1177493ec98ffdd6015c644719e7deef8935cd5b 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp @@ -6,14 +6,14 @@ void generateInvNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if((feat->type() == NODE_TYPE::DIV) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::inv(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<InvNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) - return; + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; - feat_list.push_back(std::make_shared<InvNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } InvNode::InvNode() @@ -30,7 +30,9 @@ InvNode::InvNode(node_ptr feat, unsigned long int feat_ind, double l_bound, doub throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp index fdeb581c33cfa1e4420fd3bae928aa26cb6cffd4..997a7d85bd5293ac1caae794d6fff831bbecf763 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp @@ -6,14 +6,14 @@ void generateLogNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::DIV) || (feat->type() == NODE_TYPE::INV) || (feat->type() == NODE_TYPE::MULT) || (feat->type() == NODE_TYPE::LOG) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQRT)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::log(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<LogNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<LogNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } LogNode::LogNode() @@ -33,7 +33,9 @@ LogNode::LogNode(node_ptr feat, unsigned long int feat_ind, double l_bound, doub throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.cpp index ed6032503eb8adf9127e2160d215a01eb72faf10..3e78a6fc1028d978bd9af939d9d25fc47276333f 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.cpp @@ -19,14 +19,12 @@ void generateMultNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_pt if((std::abs(div_mult_tot_first) - 1.0 > 1e-12) && std::all_of(div_mult_leaves.begin(), div_mult_leaves.end(), [&div_mult_tot_first](auto el){return std::abs(el.second) == div_mult_tot_first;})) return; - int offset = std::max(feat_1->rung(), feat_2->rung()) + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::mult(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); - - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) + node_ptr new_feat = std::make_shared<MultNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<MultNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } MultNode::MultNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.cpp index 55614a399a52e7d18f91b10dd2ef4023fb129849..cdaebd91d3010ee7eef4fac4add8a77baa19caa6 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.cpp @@ -6,14 +6,14 @@ void generateNegExpNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigne if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::ADD) || (feat->type() == NODE_TYPE::SUB) || (feat->type() == NODE_TYPE::LOG)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::neg_exp(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<NegExpNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + // No is_const check since cube function can only be constant if feat is constant + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<NegExpNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } NegExpNode::NegExpNode() @@ -33,7 +33,9 @@ NegExpNode::NegExpNode(node_ptr feat, unsigned long int feat_ind, double l_bound throw InvalidFeatureException(); set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) + + // No is_const check since cube function can only be constant if feat is constant + if(is_nan() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.cpp index 5914ec201c3d2083cd1abb1bcdfdd0a4aff5b198..d503053f2d1808b34e4f908caba603d0bc8986bf 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.cpp @@ -6,14 +6,12 @@ void generateSinNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned l if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::SIN) || (feat->type() == NODE_TYPE::COS)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sin(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<SinNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) - return; - - feat_list.push_back(std::make_shared<SinNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } SinNode::SinNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp index 8e2887aab1ae4b9b53657df11b728d63a035e24f..7dd1f6b2d53879943e15aaf6dadd1cecd48231e7 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp @@ -6,14 +6,13 @@ void generateSixPowNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigne if((feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQRT) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sixth_pow(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<SixPowNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return (!std::isfinite(d)) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return (!std::isfinite(d)) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<SixPowNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } SixPowNode::SixPowNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.cpp index fa0edfc8a75170e4eb179b8f7d8df63a5afd795c..249e6f92b48f17c52e24597872a9b8a4269bd70d 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.cpp @@ -6,14 +6,13 @@ void generateSqNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned lo if((feat->type() == NODE_TYPE::SQRT) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sq(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<SqNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<SqNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } SqNode::SqNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp index 6a126983022d055fa439ce7301118479294481e6..ea0fc8694e9a83af93af7be8da54101362fd45be 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp @@ -6,14 +6,13 @@ void generateSqrtNode(std::vector<node_ptr>& feat_list, node_ptr feat, unsigned if((feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::INV)) return; - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sqrt(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); + node_ptr new_feat = std::make_shared<SqrtNode>(feat, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) + if(std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) return; - feat_list.push_back(std::make_shared<SqrtNode>(feat, feat_ind)); + feat_list.push_back(new_feat); } SqrtNode::SqrtNode() diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.cpp index 53313a68ad568c764dd53ea7d39786dc5400c456..a338297a39a3960e2ebabb93ef6b94eb1bb9ed1d 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.cpp @@ -17,14 +17,13 @@ void generateSubNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr if((std::abs(add_sub_tot_first) > 1) && std::all_of(add_sub_leaves.begin(), add_sub_leaves.end(), [&add_sub_tot_first](auto el){return std::abs(el.second) == add_sub_tot_first;})) return; - int offset = std::max(feat_1->rung(), feat_2->rung()) + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sub(feat_1->n_samp(), feat_1->value_ptr(offset + 2), feat_2->value_ptr(offset + 1), val_ptr); + node_ptr new_feat = std::make_shared<SubNode>(feat_1, feat_2, feat_ind); + double* val_ptr = new_feat->value_ptr(); - if((util_funcs::stand_dev(val_ptr, feat_1->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat_1->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat_1->n_samp()) < l_bound)) - return; + if(new_feat->is_const() || std::any_of(val_ptr, val_ptr + new_feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, new_feat->n_samp()) < l_bound)) + return; - feat_list.push_back(std::make_shared<SubNode>(feat_1, feat_2, feat_ind)); + feat_list.push_back(new_feat); } SubNode::SubNode() diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.cpp b/src/feature_creation/node/value_storage/nodes_value_containers.cpp index 26d8945b619f9c2da682ce57a2f9939d465a17f6..c92eda1236d260f34aadbd1f16952e5b960ba32f 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.cpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.cpp @@ -1,23 +1,33 @@ #include <feature_creation/node/value_storage/nodes_value_containers.hpp> - +#include <iostream> int node_value_arrs::N_SELECTED = 0; int node_value_arrs::N_SAMPLES = 0; -int node_value_arrs::N_STORE_FEATURES = 0; +unsigned long int node_value_arrs::N_STORE_FEATURES = 0; int node_value_arrs::N_RUNGS_STORED = 0; int node_value_arrs::N_SAMPLES_TEST = 0; int node_value_arrs::MAX_N_THREADS = omp_get_max_threads(); +unsigned long int node_value_arrs::NEXT_IND = 0; std::vector<int> node_value_arrs::TEMP_STORAGE_REG; std::vector<int> node_value_arrs::TEMP_STORAGE_TEST_REG; +std::vector<int> node_value_arrs::TASK_SZ_TRAIN; +std::vector<int> node_value_arrs::TASK_SZ_TEST; + std::vector<double> node_value_arrs::D_MATRIX; std::vector<double> node_value_arrs::VALUES_ARR; std::vector<double> node_value_arrs::TEST_VALUES_ARR; std::vector<double> node_value_arrs::TEMP_STORAGE_ARR; std::vector<double> node_value_arrs::TEMP_STORAGE_TEST_ARR; -void node_value_arrs::initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat) +void node_value_arrs::initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat, bool set_task_sz) { + if(set_task_sz) + TASK_SZ_TRAIN = {n_samples}; + + if(set_task_sz) + TASK_SZ_TEST = {n_samples_test}; + N_SAMPLES = n_samples; N_SAMPLES_TEST = n_samples_test; N_RUNGS_STORED = 0; @@ -33,7 +43,34 @@ void node_value_arrs::initialize_values_arr(int n_samples, int n_samples_test, i TEMP_STORAGE_TEST_REG = std::vector<int>(MAX_N_THREADS * (3 * N_STORE_FEATURES + 1), -1); } -void node_value_arrs::resize_values_arr(int n_dims, int n_feat, bool use_temp) +void node_value_arrs::initialize_values_arr(std::vector<int> task_sz_train, std::vector<int> task_sz_test, int n_primary_feat) +{ + TASK_SZ_TRAIN = task_sz_train; + TASK_SZ_TEST = task_sz_test; + + initialize_values_arr( + std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0), + std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0), + n_primary_feat, + false + ); +} + +void node_value_arrs::set_task_sz_train(std::vector<int> task_sz_train) +{ + if(std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0) != N_SAMPLES) + throw std::logic_error("The total number of samples has changed, task_sz_train is wrong."); + TASK_SZ_TRAIN = task_sz_train; +} + +void node_value_arrs::set_task_sz_test(std::vector<int> task_sz_test) +{ + if(std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0) != N_SAMPLES_TEST) + throw std::logic_error("The total number of test samples has changed, task_sz_test is wrong."); + TASK_SZ_TEST = task_sz_test; +} + +void node_value_arrs::resize_values_arr(int n_dims, unsigned long int n_feat, bool use_temp) { N_RUNGS_STORED = n_dims; N_STORE_FEATURES = n_feat; diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.hpp b/src/feature_creation/node/value_storage/nodes_value_containers.hpp index 09c08022da3a21b8e0b17d6e2907fffd15f8ec24..c2a7827753b760c515063c07315bd255d249ce54 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.hpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.hpp @@ -12,6 +12,7 @@ #include <algorithm> #include <memory> +#include <numeric> #include <vector> #include <omp.h> @@ -28,12 +29,28 @@ namespace node_value_arrs extern std::vector<int> TEMP_STORAGE_REG; //!< Register to see which feature is stored in each slot for the training data extern std::vector<int> TEMP_STORAGE_TEST_REG; //!< Register to see which feature is stored in each slot for the test data + extern std::vector<int> TASK_SZ_TRAIN; //!< Number of training samples per task + extern std::vector<int> TASK_SZ_TEST; //!< Number of test sample per task + extern int N_SELECTED; //!< Number of features selected extern int N_SAMPLES; //!< Number of training samples for each feature extern int N_SAMPLES_TEST; //!< Number of test samples for each feature - extern int N_STORE_FEATURES; //!< Number of features with stored values + extern unsigned long int N_STORE_FEATURES; //!< Number of features with stored values extern int N_RUNGS_STORED; //!< Number of rungs with values stored extern int MAX_N_THREADS; //!< Get the maximum number of threads possible + extern unsigned long int NEXT_IND; //!< The next array index to use + + /** + * @brief Initialize the node value arrays + * @details Using the size of the initial feature space constructor the storage arrays + * + * @param n_samples Number of training samples for each feature + * @param n_samples_test Number of test samples for each feature + * @param n_primary_feat Number of primary features + * @param set_test_task_sz If True reset the task_sz vectors + */ + void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat, bool et_task_sz); + /** * @brief Initialize the node value arrays * @details Using the size of the initial feature space constructor the storage arrays @@ -42,7 +59,20 @@ namespace node_value_arrs * @param n_samples_test Number of test samples for each feature * @param n_primary_feat Number of primary features */ - void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat); + inline void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat) + { + initialize_values_arr(n_samples, n_samples_test, n_primary_feat, true); + } + + /** + * @brief Initialize the node value arrays + * @details Using the size of the initial feature space constructor the storage arrays + * + * @param task_sz_train Number of training samples per task + * @param task_sz_test Number of test sample per task + * @param n_primary_feat Number of primary features + */ + void initialize_values_arr(std::vector<int> task_sz_train, std::vector<int> task_sz_test, int n_primary_feat); /** * @brief Resize the node value arrays @@ -52,7 +82,7 @@ namespace node_value_arrs * @param n_feat number of features to store * @param use_temp If true keep the temporary_storage */ - void resize_values_arr(int n_dims, int n_feat, bool use_temp); + void resize_values_arr(int n_dims, unsigned long int n_feat, bool use_temp); /** * @brief Initialize the descriptor matrix @@ -69,6 +99,20 @@ namespace node_value_arrs */ void resize_d_matrix_arr(int n_select); + /** + * @brief Reset the global TASK_SZ_TRAIN vector + * + * @param task_sz_train the new task_sz train + */ + void set_task_sz_train(std::vector<int> task_sz_train); + + /** + * @brief Reset the global TASK_SZ_TEST vector + * + * @param task_sz_train the new test_sz train + */ + void set_task_sz_test(std::vector<int> task_sz_test); + /** * @brief Get a reference slot/feature register of the training data * diff --git a/src/python/bindings_docstring_keyed.cpp b/src/python/bindings_docstring_keyed.cpp index 0bcfdec07cab7c4f48efbc2a39ef9d2fefebc987..32d5e90c14eca4a8f94f65640e8e720f5b9322d5 100644 --- a/src/python/bindings_docstring_keyed.cpp +++ b/src/python/bindings_docstring_keyed.cpp @@ -37,8 +37,10 @@ void sisso::register_all() sisso::feature_creation::node::registerSqrtNode(); sisso::feature_creation::node::registerSixPowNode(); + void (*init_val_ar)(int, int, int) = &node_value_arrs::initialize_values_arr; + def("phi_selected_from_file", &str2node::phi_selected_from_file_py); - def("initialize_values_arr", &node_value_arrs::initialize_values_arr); + def("initialize_values_arr", init_val_ar); def("initialize_d_matrix_arr", &node_value_arrs::initialize_d_matrix_arr); }