diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index 6c4c41ca358eb231b7368e977755d15935d9d2f8..db8bee3d098283d245a5cdd3f881c2260805ce13 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -80,7 +80,7 @@ void FeatureSpace::initialize_fs(std::vector<double> prop) for(auto & op : _allowed_ops) { - if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0)) + if((op.compare("add") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0) || (op.compare("sub") == 0)) _com_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]); else if((op.compare("div") == 0)) _bin_operators.push_back(allowed_op_maps::binary_operator_map[op]); @@ -113,7 +113,7 @@ void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std for(auto& op : _com_bin_operators) { - for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2) + for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2) { try { @@ -129,7 +129,7 @@ void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std for(auto& op : _bin_operators) { - for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2) + for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2) { try { @@ -161,6 +161,7 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop) for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn) { + node_value_arrs::clear_temp_reg(); if(nn == _max_phi) { u_bound = _u_bound; @@ -170,11 +171,13 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop) _n_feat = _phi.size(); int feat_ind = _phi.size(); - for(auto feat_1 = _phi.begin() + _mpi_comm->rank() + _start_gen.back(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size()) - generate_new_feats(feat_1, next_phi, feat_ind, l_bound, u_bound); + std::array<int, 2> start_end = _mpi_comm->get_start_end_from_list(_phi.size() - _start_gen.back(), _start_gen.back()); + for(auto feat_1 = _phi.begin() + start_end[0]; feat_1 < _phi.begin() + start_end[1]; ++feat_1) + generate_new_feats(feat_1, next_phi, feat_ind, l_bound, u_bound); _start_gen.push_back(_phi.size()); + node_value_arrs::clear_temp_reg(); if((nn < _max_phi) || (nn <= _n_rung_store) || (_mpi_comm->size() == 1)) { int new_phi_size; @@ -197,6 +200,7 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop) } new_phi_size = _phi.size(); mpi::broadcast(*_mpi_comm, new_phi_size, 0); + for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb) mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0); } @@ -209,14 +213,9 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop) for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb) mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0); } - - // feat_ind = _phi.size(); - node_value_arrs::clear_temp_reg(); if(nn < _max_phi) { - // std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1); - // Remove identical features _scores.resize(_phi.size()); _mpi_comm->barrier(); @@ -229,8 +228,6 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop) std::vector<int> del_inds; _mpi_comm->barrier(); - - for(int sc = 0; sc < _scores.size() - 1; ++sc) if(_scores[inds[sc + 1]] - _scores[inds[sc]] < 1e-10) if(std::abs(util_funcs::r(_phi[_start_gen.back() + inds[sc]]->value_ptr(), _phi[_start_gen.back() + inds[sc]]->value_ptr(), _n_samp) - std::abs(util_funcs::r(_phi[_start_gen.back() + inds[sc]]->value_ptr(), _phi[_start_gen.back() + inds[sc + 1]]->value_ptr(), _n_samp))) < 1e-13) @@ -244,13 +241,12 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop) for(int ff = _start_gen.back(); ff < _phi.size(); ++ff) _phi[ff]->reindex(ff); } - if(nn <= _n_rung_store) { bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store); node_value_arrs::resize_values_arr(nn, _phi.size(), use_temp); - for(int ff = _start_gen[0]; ff < _phi.size(); ++ff) + for(int ff = _start_gen.back(); ff < _phi.size(); ++ff) { _phi[ff]->set_value(); _phi[ff]->set_test_value(); @@ -436,12 +432,6 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt { double cur_score = scores[inds[ii]]; - // bool is_valid = valid_score_against_current(scores_sel.size(), generated_phi[inds[ii]]->value_ptr(), cur_score, scores_sel, scores_comp); - - // // Check the feature against those selected from previous SIS iterations - // if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid) - // is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_prev_sel, scores_comp); - if(valid_feature_against_selected(generated_phi[inds[ii]]->value_ptr(), node_value_arrs::N_SELECTED - _n_sis_select + scores_sel.size())) { if(scores_sel.size() == _n_sis_select) @@ -478,33 +468,8 @@ bool FeatureSpace::valid_feature_against_selected(double* val_ptr, int end_sel, { double base_val = util_funcs::r(val_ptr, val_ptr, _n_samp); for(int dd = start_sel; dd < end_sel; ++dd) - if(base_val - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1.0 - _cross_cor_max + 1e-10) - return false; - return true; -} - -bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp) -{ - std::transform(scores_past.begin(), scores_past.end(), scores_comp.begin(), [&cur_score](double score){return std::abs(cur_score - score);}); - - // If two scores are the same then they are possibly the same feature, if not then they can't be - if(*std::min_element(scores_comp.begin(), scores_comp.end()) < 1e-10) { - int dd = std::min_element(scores_comp.begin(), scores_comp.end()) - scores_comp.begin(); - if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp))) < 1.0 - _cross_cor_max + 1e-10) - return false; - } - return true; -} - -bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_sel, std::vector<double>& scores_comp) -{ - std::transform(scores_sel.begin(), scores_sel.begin() + end_check, scores_comp.begin(), [&cur_score](double score){return std::abs(cur_score - score);}); - // If two scores are the same then they are possibly the same feature, if not then they can't be - if(*std::min_element(scores_comp.begin(), scores_comp.begin() + end_check) < 1e-10) - { - int dd = std::min_element(scores_comp.begin(), scores_comp.begin() + end_check) - scores_comp.begin(); - if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(node_value_arrs::N_SELECTED - _n_sis_select + dd), val_ptr, _n_samp))) < 1.0 - _cross_cor_max + 1e-10) + if(base_val - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1.0 - _cross_cor_max + 1e-10) return false; } return true; @@ -553,11 +518,6 @@ void FeatureSpace::sis(std::vector<double>& prop) while((cur_feat_local != _n_sis_select) && (ii < _scores.size())) { - // bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp); - // // Check the feature against those selected from previous SIS iterations - // if(cur_feat > 0 && is_valid) - // is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp); - if(valid_feature_against_selected(_phi[inds[ii]]->value_ptr(), cur_feat + cur_feat_local)) { scores_sel[cur_feat_local] = _scores[inds[ii]]; @@ -580,7 +540,6 @@ void FeatureSpace::sis(std::vector<double>& prop) node_value_arrs::clear_temp_test_reg(); } - // If we are only on one process then phi_sel are the selected features if(_mpi_comm->size() > 1) { @@ -687,29 +646,28 @@ void FeatureSpace::sis(std::vector<double>& prop) scores_sel = std::vector<double>(_n_sis_select, 0.0); // Get the n_sis_select best features (compare against features sent from other processes) - while((cur_feat != node_value_arrs::N_SELECTED) && (ii < sent_scores.size())) + while((cur_feat + cur_feat_local != node_value_arrs::N_SELECTED) && (ii < sent_scores.size())) { - if(valid_feature_against_selected(sent_phi[inds[ii]]->value().data(), cur_feat + cur_feat_local, cur_feat)) + if(valid_feature_against_selected(sent_phi[inds[ii]]->value_ptr(), cur_feat + cur_feat_local, cur_feat)) { - out_file_stream << std::setw(14) <<std::left << cur_feat << sent_phi[inds[ii]]->postfix_expr() << std::endl; - sum_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl; + out_file_stream << std::setw(14) <<std::left << cur_feat + cur_feat_local << sent_phi[inds[ii]]->postfix_expr() << std::endl; + sum_file_stream << std::setw(14) <<std::left << cur_feat + cur_feat_local << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl; _phi_selected.push_back(sent_phi[inds[ii]]); _phi_selected.back()->set_selected(true); - _phi_selected.back()->set_d_mat_ind(cur_feat); + _phi_selected.back()->set_d_mat_ind(cur_feat + cur_feat_local); _phi_selected.back()->set_value(); scores_sel[cur_feat_local] = sent_scores[inds[ii]]; ++cur_feat_local; - ++cur_feat; + // ++cur_feat; } ++ii; } } if(_phi_selected.size() != node_value_arrs::N_SELECTED) throw std::logic_error("SIS went through all sent features and did not select enough (" + std::to_string(_phi_selected.size() - node_value_arrs::N_SELECTED + _n_sis_select) + " not " + std::to_string(_n_sis_select) + ")."); - cur_feat -= cur_feat_local; } else { diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index d328c28906423005ba7e4a22f0e508707f594c86..a2586d657242080206cb57b7cbe7347eea243be1 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -230,32 +230,6 @@ public: */ bool valid_feature_against_selected(double* val_ptr, int end_sel, int start_sel = 0); - /** - * @brief Check if a feature overlaps with a feature previously selected in earlier SIS iterations - * @details Compares the projection score of the current candidate feature with all those of previously selected features (using the current prop) and - * if they are within 1e-10, then check the correlation between the features themselves - * - * @param val_ptr pointer to the candidate feature's data - * @param cur_score the projection score of the candidate feature - * @param scores_past The projection scores of the previous features - * @param scores_comp vector to temporarily store the comparison of projection scores - * @return True if the feature does not overlap with any previously selected - */ - bool valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp); - - /** - * @brief Check if a feature overlaps with a feature previously selected in this SIS iterations - * @details CCompares the projection score of the current candidate feature with all those of previously selected features in this iteration and - * if they are within 1e-10, then check the correlation between the features themselves - * - * @param end_check the end point to stop the comparison (the same as the current number of selected features) - * @param val_ptr pointer to the candidate feature's data - * @param cur_score the projection score of the candidate feature - * @param scores_selected The projection scores of the previous features - * @param scores_comp vector to temporarily store the comparison of projection scores - * @return True if the feature does not overlap with any previously selected - */ - bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp); /** * @brief Perform SIS on a feature set with a specified property * @details Perform sure-independence screening with either the correct property or the error diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp index 708e9c52fb1d5941064e27d84cb4b3ec8e69f685..f3b069b0fe8cf3747e448633da2add75a452c655 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_difference.cpp @@ -14,7 +14,7 @@ AbsDiffNode::AbsDiffNode(std::array<node_ptr, 2> feats, int feat_ind, double l_b std::map<std::string, int> add_sub_leaves; int expected_abs_tot = 0; _feats[0]->update_add_sub_leaves(add_sub_leaves, 1, expected_abs_tot); - _feats[1]->update_add_sub_leaves(add_sub_leaves, 1, expected_abs_tot); + _feats[1]->update_add_sub_leaves(add_sub_leaves,-1, expected_abs_tot); if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); @@ -43,7 +43,7 @@ AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int feat_ind, double std::map<std::string, int> add_sub_leaves; int expected_abs_tot = 0; _feats[0]->update_add_sub_leaves(add_sub_leaves, 1, expected_abs_tot); - _feats[1]->update_add_sub_leaves(add_sub_leaves, 1, expected_abs_tot); + _feats[1]->update_add_sub_leaves(add_sub_leaves,-1, expected_abs_tot); if((add_sub_leaves.size() < 2)) throw InvalidFeatureException(); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp index 90ee52bf8f6c2bec4591159db37807a1fa4ebd1d..931269fe46f4d42ce8fa76f07ce4e6a28151f308 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/absolute_value.cpp @@ -8,6 +8,7 @@ AbsNode::AbsNode() AbsNode::AbsNode(std::array<node_ptr, 1> feats, int feat_ind, double l_bound, double u_bound): OperatorNode(feats, feat_ind) { + if((feats[0]->type() == NODE_TYPE::ABS) || (feats[0]->type() == NODE_TYPE::ABS_DIFF)) set_value(); if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); @@ -18,6 +19,7 @@ AbsNode::AbsNode(std::array<node_ptr, 1> feats, int feat_ind, double l_bound, do AbsNode::AbsNode(node_ptr feat, int feat_ind, double l_bound, double u_bound): OperatorNode({feat}, feat_ind) { + if((feat->type() == NODE_TYPE::ABS) || (feat->type() == NODE_TYPE::ABS_DIFF)) set_value(); if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); diff --git a/src/feature_creation/units/Unit.cpp b/src/feature_creation/units/Unit.cpp index 75e4bec0285a1ec6093ad41c5fa021f7944b8a34..7098f0bd7c01e8c67089944aca69e79ec23db31e 100644 --- a/src/feature_creation/units/Unit.cpp +++ b/src/feature_creation/units/Unit.cpp @@ -159,6 +159,7 @@ bool Unit::equal(Unit unit_2) else if(_dct[el.first] != el.second) return false; } + for(auto& el : _dct) { if((unit_2.dct().count(el.first) == 0) && (el.second != 0)) @@ -166,11 +167,12 @@ bool Unit::equal(Unit unit_2) else if(unit_2.dct()[el.first] != el.second) return false; } + if(unit_2.dct().size() == 0) { for(auto& el : _dct) { - if(_dct.count(el.first) != 0) + if((_dct.count(el.first) != 0) && (el.second != 0)) return false; } } diff --git a/src/mpi_interface/MPI_Interface.cpp b/src/mpi_interface/MPI_Interface.cpp index 992a6449397170ac38a00f7b7b1b0cfbd0e5ff08..d0c18d4ead03bd59e21b7194a7ccc88811aec362 100644 --- a/src/mpi_interface/MPI_Interface.cpp +++ b/src/mpi_interface/MPI_Interface.cpp @@ -3,6 +3,17 @@ MPI_Interface::MPI_Interface() : boost::mpi::communicator() {} +std::array<int, 2> MPI_Interface::get_start_end_from_list(int sz, int start) +{ + int els_per_rank = sz / size(); + int remaineder = sz % size(); + + std::array<int, 2> start_end; + start_end[0] = start + els_per_rank * rank() + std::min(rank(), remaineder); + start_end[1] = start + els_per_rank * (rank() + 1) + std::min(rank() + 1, remaineder); + + return start_end; +} std::shared_ptr<MPI_Interface> mpi_setup::comm; diff --git a/src/mpi_interface/MPI_Interface.hpp b/src/mpi_interface/MPI_Interface.hpp index 1a925415fbce220031680a30768c839b362122e2..3374f9eb1461cff71bfe7dc905d3edca5729583a 100644 --- a/src/mpi_interface/MPI_Interface.hpp +++ b/src/mpi_interface/MPI_Interface.hpp @@ -40,6 +40,7 @@ public: */ int cantorTagGen(unsigned int procSend, unsigned int procRecv, unsigned int maxOffest, unsigned int offest) { return (int((procSend + procRecv) * (procSend + procSend +1) / 2) + procRecv) * maxOffest + offest; } + std::array<int, 2> get_start_end_from_list(int sz, int start); };