Commit f1d78764 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Bug fixes

Segfault from wrong start/end in cross-correlation checks

Unit == unit had some issues
parent b7a70af3
......@@ -80,7 +80,7 @@ void FeatureSpace::initialize_fs(std::vector<double> prop)
for(auto & op : _allowed_ops)
{
if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0))
if((op.compare("add") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0) || (op.compare("sub") == 0))
_com_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
else if((op.compare("div") == 0))
_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
......@@ -113,7 +113,7 @@ void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std
for(auto& op : _com_bin_operators)
{
for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2)
{
try
{
......@@ -129,7 +129,7 @@ void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std
for(auto& op : _bin_operators)
{
for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2)
{
try
{
......@@ -161,6 +161,7 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop)
for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn)
{
node_value_arrs::clear_temp_reg();
if(nn == _max_phi)
{
u_bound = _u_bound;
......@@ -170,11 +171,13 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop)
_n_feat = _phi.size();
int feat_ind = _phi.size();
for(auto feat_1 = _phi.begin() + _mpi_comm->rank() + _start_gen.back(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
generate_new_feats(feat_1, next_phi, feat_ind, l_bound, u_bound);
std::array<int, 2> start_end = _mpi_comm->get_start_end_from_list(_phi.size() - _start_gen.back(), _start_gen.back());
for(auto feat_1 = _phi.begin() + start_end[0]; feat_1 < _phi.begin() + start_end[1]; ++feat_1)
generate_new_feats(feat_1, next_phi, feat_ind, l_bound, u_bound);
_start_gen.push_back(_phi.size());
node_value_arrs::clear_temp_reg();
if((nn < _max_phi) || (nn <= _n_rung_store) || (_mpi_comm->size() == 1))
{
int new_phi_size;
......@@ -197,6 +200,7 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop)
}
new_phi_size = _phi.size();
mpi::broadcast(*_mpi_comm, new_phi_size, 0);
for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
}
......@@ -209,14 +213,9 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop)
for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
}
// feat_ind = _phi.size();
node_value_arrs::clear_temp_reg();
if(nn < _max_phi)
{
// std::fill_n(node_value_arrs::TEMP_STORAGE_REG.data(), node_value_arrs::TEMP_STORAGE_REG.size(), -1);
// Remove identical features
_scores.resize(_phi.size());
_mpi_comm->barrier();
......@@ -229,8 +228,6 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop)
std::vector<int> del_inds;
_mpi_comm->barrier();
for(int sc = 0; sc < _scores.size() - 1; ++sc)
if(_scores[inds[sc + 1]] - _scores[inds[sc]] < 1e-10)
if(std::abs(util_funcs::r(_phi[_start_gen.back() + inds[sc]]->value_ptr(), _phi[_start_gen.back() + inds[sc]]->value_ptr(), _n_samp) - std::abs(util_funcs::r(_phi[_start_gen.back() + inds[sc]]->value_ptr(), _phi[_start_gen.back() + inds[sc + 1]]->value_ptr(), _n_samp))) < 1e-13)
......@@ -244,13 +241,12 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop)
for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
_phi[ff]->reindex(ff);
}
if(nn <= _n_rung_store)
{
bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store);
node_value_arrs::resize_values_arr(nn, _phi.size(), use_temp);
for(int ff = _start_gen[0]; ff < _phi.size(); ++ff)
for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
{
_phi[ff]->set_value();
_phi[ff]->set_test_value();
......@@ -436,12 +432,6 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt
{
double cur_score = scores[inds[ii]];
// bool is_valid = valid_score_against_current(scores_sel.size(), generated_phi[inds[ii]]->value_ptr(), cur_score, scores_sel, scores_comp);
// // Check the feature against those selected from previous SIS iterations
// if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid)
// is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_prev_sel, scores_comp);
if(valid_feature_against_selected(generated_phi[inds[ii]]->value_ptr(), node_value_arrs::N_SELECTED - _n_sis_select + scores_sel.size()))
{
if(scores_sel.size() == _n_sis_select)
......@@ -478,33 +468,8 @@ bool FeatureSpace::valid_feature_against_selected(double* val_ptr, int end_sel,
{
double base_val = util_funcs::r(val_ptr, val_ptr, _n_samp);
for(int dd = start_sel; dd < end_sel; ++dd)
if(base_val - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1.0 - _cross_cor_max + 1e-10)
return false;
return true;
}
bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp)
{
std::transform(scores_past.begin(), scores_past.end(), scores_comp.begin(), [&cur_score](double score){return std::abs(cur_score - score);});
// If two scores are the same then they are possibly the same feature, if not then they can't be
if(*std::min_element(scores_comp.begin(), scores_comp.end()) < 1e-10)
{
int dd = std::min_element(scores_comp.begin(), scores_comp.end()) - scores_comp.begin();
if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp))) < 1.0 - _cross_cor_max + 1e-10)
return false;
}
return true;
}
bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
{
std::transform(scores_sel.begin(), scores_sel.begin() + end_check, scores_comp.begin(), [&cur_score](double score){return std::abs(cur_score - score);});
// If two scores are the same then they are possibly the same feature, if not then they can't be
if(*std::min_element(scores_comp.begin(), scores_comp.begin() + end_check) < 1e-10)
{
int dd = std::min_element(scores_comp.begin(), scores_comp.begin() + end_check) - scores_comp.begin();
if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(node_value_arrs::N_SELECTED - _n_sis_select + dd), val_ptr, _n_samp))) < 1.0 - _cross_cor_max + 1e-10)
if(base_val - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1.0 - _cross_cor_max + 1e-10)
return false;
}
return true;
......@@ -553,11 +518,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
{
// bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp);
// // Check the feature against those selected from previous SIS iterations
// if(cur_feat > 0 && is_valid)
// is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
if(valid_feature_against_selected(_phi[inds[ii]]->value_ptr(), cur_feat + cur_feat_local))
{
scores_sel[cur_feat_local] = _scores[inds[ii]];
......@@ -580,7 +540,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
node_value_arrs::clear_temp_test_reg();
}
// If we are only on one process then phi_sel are the selected features
if(_mpi_comm->size() > 1)
{
......@@ -687,29 +646,28 @@ void FeatureSpace::sis(std::vector<double>& prop)
scores_sel = std::vector<double>(_n_sis_select, 0.0);
// Get the n_sis_select best features (compare against features sent from other processes)
while((cur_feat != node_value_arrs::N_SELECTED) && (ii < sent_scores.size()))
while((cur_feat + cur_feat_local != node_value_arrs::N_SELECTED) && (ii < sent_scores.size()))
{
if(valid_feature_against_selected(sent_phi[inds[ii]]->value().data(), cur_feat + cur_feat_local, cur_feat))
if(valid_feature_against_selected(sent_phi[inds[ii]]->value_ptr(), cur_feat + cur_feat_local, cur_feat))
{
out_file_stream << std::setw(14) <<std::left << cur_feat << sent_phi[inds[ii]]->postfix_expr() << std::endl;
sum_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl;
out_file_stream << std::setw(14) <<std::left << cur_feat + cur_feat_local << sent_phi[inds[ii]]->postfix_expr() << std::endl;
sum_file_stream << std::setw(14) <<std::left << cur_feat + cur_feat_local << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl;
_phi_selected.push_back(sent_phi[inds[ii]]);
_phi_selected.back()->set_selected(true);
_phi_selected.back()->set_d_mat_ind(cur_feat);
_phi_selected.back()->set_d_mat_ind(cur_feat + cur_feat_local);
_phi_selected.back()->set_value();
scores_sel[cur_feat_local] = sent_scores[inds[ii]];
++cur_feat_local;
++cur_feat;
// ++cur_feat;
}
++ii;
}
}
if(_phi_selected.size() != node_value_arrs::N_SELECTED)
throw std::logic_error("SIS went through all sent features and did not select enough (" + std::to_string(_phi_selected.size() - node_value_arrs::N_SELECTED + _n_sis_select) + " not " + std::to_string(_n_sis_select) + ").");
cur_feat -= cur_feat_local;
}
else
{
......
......@@ -230,32 +230,6 @@ public:
*/
bool valid_feature_against_selected(double* val_ptr, int end_sel, int start_sel = 0);
/**
* @brief Check if a feature overlaps with a feature previously selected in earlier SIS iterations
* @details Compares the projection score of the current candidate feature with all those of previously selected features (using the current prop) and
* if they are within 1e-10, then check the correlation between the features themselves
*
* @param val_ptr pointer to the candidate feature's data
* @param cur_score the projection score of the candidate feature
* @param scores_past The projection scores of the previous features
* @param scores_comp vector to temporarily store the comparison of projection scores
* @return True if the feature does not overlap with any previously selected
*/
bool valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp);
/**
* @brief Check if a feature overlaps with a feature previously selected in this SIS iterations
* @details CCompares the projection score of the current candidate feature with all those of previously selected features in this iteration and
* if they are within 1e-10, then check the correlation between the features themselves
*
* @param end_check the end point to stop the comparison (the same as the current number of selected features)
* @param val_ptr pointer to the candidate feature's data
* @param cur_score the projection score of the candidate feature
* @param scores_selected The projection scores of the previous features
* @param scores_comp vector to temporarily store the comparison of projection scores
* @return True if the feature does not overlap with any previously selected
*/
bool valid_score_against_current(int end_check, double* val_ptr, double cur_score, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
/**
* @brief Perform SIS on a feature set with a specified property
* @details Perform sure-independence screening with either the correct property or the error
......
......@@ -14,7 +14,7 @@ AbsDiffNode::AbsDiffNode(std::array<node_ptr, 2> feats, int feat_ind, double l_b
std::map<std::string, int> add_sub_leaves;
int expected_abs_tot = 0;
_feats[0]->update_add_sub_leaves(add_sub_leaves, 1, expected_abs_tot);
_feats[1]->update_add_sub_leaves(add_sub_leaves, 1, expected_abs_tot);
_feats[1]->update_add_sub_leaves(add_sub_leaves,-1, expected_abs_tot);
if((add_sub_leaves.size() < 2))
throw InvalidFeatureException();
......@@ -43,7 +43,7 @@ AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int feat_ind, double
std::map<std::string, int> add_sub_leaves;
int expected_abs_tot = 0;
_feats[0]->update_add_sub_leaves(add_sub_leaves, 1, expected_abs_tot);
_feats[1]->update_add_sub_leaves(add_sub_leaves, 1, expected_abs_tot);
_feats[1]->update_add_sub_leaves(add_sub_leaves,-1, expected_abs_tot);
if((add_sub_leaves.size() < 2))
throw InvalidFeatureException();
......
......@@ -8,6 +8,7 @@ AbsNode::AbsNode()
AbsNode::AbsNode(std::array<node_ptr, 1> feats, int feat_ind, double l_bound, double u_bound):
OperatorNode(feats, feat_ind)
{
if((feats[0]->type() == NODE_TYPE::ABS) || (feats[0]->type() == NODE_TYPE::ABS_DIFF))
set_value();
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
......@@ -18,6 +19,7 @@ AbsNode::AbsNode(std::array<node_ptr, 1> feats, int feat_ind, double l_bound, do
AbsNode::AbsNode(node_ptr feat, int feat_ind, double l_bound, double u_bound):
OperatorNode({feat}, feat_ind)
{
if((feat->type() == NODE_TYPE::ABS) || (feat->type() == NODE_TYPE::ABS_DIFF))
set_value();
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
......
......@@ -159,6 +159,7 @@ bool Unit::equal(Unit unit_2)
else if(_dct[el.first] != el.second)
return false;
}
for(auto& el : _dct)
{
if((unit_2.dct().count(el.first) == 0) && (el.second != 0))
......@@ -166,11 +167,12 @@ bool Unit::equal(Unit unit_2)
else if(unit_2.dct()[el.first] != el.second)
return false;
}
if(unit_2.dct().size() == 0)
{
for(auto& el : _dct)
{
if(_dct.count(el.first) != 0)
if((_dct.count(el.first) != 0) && (el.second != 0))
return false;
}
}
......
......@@ -3,6 +3,17 @@
MPI_Interface::MPI_Interface() : boost::mpi::communicator()
{}
std::array<int, 2> MPI_Interface::get_start_end_from_list(int sz, int start)
{
int els_per_rank = sz / size();
int remaineder = sz % size();
std::array<int, 2> start_end;
start_end[0] = start + els_per_rank * rank() + std::min(rank(), remaineder);
start_end[1] = start + els_per_rank * (rank() + 1) + std::min(rank() + 1, remaineder);
return start_end;
}
std::shared_ptr<MPI_Interface> mpi_setup::comm;
......
......@@ -40,6 +40,7 @@ public:
*/
int cantorTagGen(unsigned int procSend, unsigned int procRecv, unsigned int maxOffest, unsigned int offest) { return (int((procSend + procRecv) * (procSend + procSend +1) / 2) + procRecv) * maxOffest + offest; }
std::array<int, 2> get_start_end_from_list(int sz, int start);
};
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment