Commit adeb3b06 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Cross-correlation now working

can set a maximum cross correlation for selected features
parent e68e8a16
......@@ -29,6 +29,7 @@ FeatureSpace::FeatureSpace(
int n_sis_select,
int max_store_rung,
int n_rung_generate,
double cross_corr_max,
double min_abs_feat_val,
double max_abs_feat_val
):
......@@ -41,6 +42,7 @@ FeatureSpace::FeatureSpace(
_feature_space_file("feature_space/selected_features.txt"),
_feature_space_summary_file("feature_space/SIS_summary.txt"),
_mpi_comm(mpi_comm),
_cross_cor_max(cross_corr_max),
_l_bound(min_abs_feat_val),
_u_bound(max_abs_feat_val),
_max_phi(max_phi),
......@@ -434,13 +436,13 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt
{
double cur_score = scores[inds[ii]];
bool is_valid = valid_score_against_current(scores_sel.size(), generated_phi[inds[ii]]->value_ptr(), cur_score, scores_sel, scores_comp);
// bool is_valid = valid_score_against_current(scores_sel.size(), generated_phi[inds[ii]]->value_ptr(), cur_score, scores_sel, scores_comp);
// Check the feature against those selected from previous SIS iterations
if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid)
is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_prev_sel, scores_comp);
// // Check the feature against those selected from previous SIS iterations
// if((node_value_arrs::N_SELECTED > _n_sis_select) && is_valid)
// is_valid = valid_score_against_past(generated_phi[inds[ii]]->value_ptr(), scores[inds[ii]], scores_prev_sel, scores_comp);
if(is_valid)
if(valid_feature_against_selected(generated_phi[inds[ii]]->value_ptr(), node_value_arrs::N_SELECTED - _n_sis_select + scores_sel.size()))
{
if(scores_sel.size() == _n_sis_select)
{
......@@ -472,6 +474,15 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt
}
}
bool FeatureSpace::valid_feature_against_selected(double* val_ptr, int end_sel, int start_sel)
{
double base_val = util_funcs::r(val_ptr, val_ptr, _n_samp);
for(int dd = start_sel; dd < end_sel; ++dd)
if(base_val - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp)) < 1.0 - _cross_cor_max + 1e-10)
return false;
return true;
}
bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, std::vector<double> scores_past, std::vector<double>& scores_comp)
{
std::transform(scores_past.begin(), scores_past.end(), scores_comp.begin(), [&cur_score](double score){return std::abs(cur_score - score);});
......@@ -480,7 +491,7 @@ bool FeatureSpace::valid_score_against_past(double* val_ptr, double cur_score, s
if(*std::min_element(scores_comp.begin(), scores_comp.end()) < 1e-10)
{
int dd = std::min_element(scores_comp.begin(), scores_comp.end()) - scores_comp.begin();
if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp))) < 1e-10)
if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), val_ptr, _n_samp))) < 1.0 - _cross_cor_max + 1e-10)
return false;
}
return true;
......@@ -493,7 +504,7 @@ bool FeatureSpace::valid_score_against_current(int end_check, double* val_ptr, d
if(*std::min_element(scores_comp.begin(), scores_comp.begin() + end_check) < 1e-10)
{
int dd = std::min_element(scores_comp.begin(), scores_comp.begin() + end_check) - scores_comp.begin();
if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(node_value_arrs::N_SELECTED - _n_sis_select + dd), val_ptr, _n_samp))) < 1e-10)
if(std::abs(util_funcs::r(val_ptr, val_ptr, _n_samp) - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(node_value_arrs::N_SELECTED - _n_sis_select + dd), val_ptr, _n_samp))) < 1.0 - _cross_cor_max + 1e-10)
return false;
}
return true;
......@@ -530,7 +541,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
int cur_feat_local = 0;
double cur_score = 0.0;
while(_scores[inds[ii]] < -1.0)
while(_scores[inds[ii]] < -1.0000000001)
++ii;
std::vector<double> scores_prev_sel;
......@@ -542,12 +553,12 @@ void FeatureSpace::sis(std::vector<double>& prop)
while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
{
bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp);
// Check the feature against those selected from previous SIS iterations
if(cur_feat > 0 && is_valid)
is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
// bool is_valid = valid_score_against_current(cur_feat_local, _phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_sel, scores_comp);
// // Check the feature against those selected from previous SIS iterations
// if(cur_feat > 0 && is_valid)
// is_valid = valid_score_against_past(_phi[inds[ii]]->value_ptr(), _scores[inds[ii]], scores_prev_sel, scores_comp);
if(is_valid)
if(valid_feature_against_selected(_phi[inds[ii]]->value_ptr(), cur_feat + cur_feat_local))
{
scores_sel[cur_feat_local] = _scores[inds[ii]];
phi_sel.push_back(_phi[inds[ii]]);
......@@ -678,7 +689,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
// Get the n_sis_select best features (compare against features sent from other processes)
while((cur_feat != node_value_arrs::N_SELECTED) && (ii < sent_scores.size()))
{
if(valid_score_against_current(cur_feat_local, sent_phi[inds[ii]]->value().data(), sent_scores[inds[ii]], scores_sel, scores_comp))
if(valid_feature_against_selected(sent_phi[inds[ii]]->value().data(), cur_feat + cur_feat_local, cur_feat))
{
out_file_stream << std::setw(14) <<std::left << cur_feat << sent_phi[inds[ii]]->postfix_expr() << std::endl;
sum_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl;
......
......@@ -55,6 +55,7 @@ class FeatureSpace
std::function<void(double*, double*, std::vector<node_ptr>&, std::vector<int>&, int)> _project; //!< Function used to calculate the scores for SIS
std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPI communicator
double _cross_cor_max; //!< Maximum cross-correlation used for selecting features
double _l_bound; //!< lower bound for absolute value of the features
double _u_bound; //!< upper bound for absolute value of the features
......@@ -78,6 +79,7 @@ public:
* @param n_sis_select number of features to select during each SIS step
* @param max_store_rung number of rungs to calculate and store the value of the features for all samples
* @param n_rung_generate number of rungs to generate on the fly during SIS (this must be 1 or 0 right now, possible to be higher with recursive algorithm)
* @param cross_corr_max Maximum cross-correlation used for selecting features
* @param min_abs_feat_val minimum absolute feature value
* @param max_abs_feat_val maximum absolute feature value
*/
......@@ -91,6 +93,7 @@ public:
int n_sis_select=1,
int max_store_rung=-1,
int n_rung_generate=0,
double cross_corr_max=1.0,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50
);
......@@ -216,6 +219,16 @@ public:
*/
void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
/**
* @brief Checks the feature to see if it is still valid against previously selected features
*
* @param val_ptr pointer to value array of the current feature
* @param end_sel index of the feature to stop checking
*
* @return True if the feature is still valid
*/
bool valid_feature_against_selected(double* val_ptr, int end_sel, int start_sel = 0);
/**
* @brief Check if a feature overlaps with a feature previously selected in earlier SIS iterations
* @details Compares the projection score of the current candidate feature with all those of previously selected features (using the current prop) and
......@@ -273,6 +286,7 @@ public:
* @param n_sis_select number of features to select during each SIS step
* @param max_store_rung number of rungs to calculate and store the value of the features for all samples
* @param n_rung_generate number of rungs to generate on the fly during SIS (this must be 1 or 0 right now, possible to be higher with recursive algorithm)
* @param cross_corr_max Maximum cross-correlation used for selecting features
* @param min_abs_feat_val minimum absolute feature value
* @param max_abs_feat_val maximum absolute feature value
*/
......@@ -285,6 +299,7 @@ public:
int n_sis_select=1,
int max_store_rung=-1,
int n_rung_generate=0,
double cross_corr_max=1.0,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50
);
......@@ -301,6 +316,7 @@ public:
* @param n_sis_select number of features to select during each SIS step
* @param max_store_rung number of rungs to calculate and store the value of the features for all samples
* @param n_rung_generate number of rungs to generate on the fly during SIS (this must be 1 or 0 right now, possible to be higher with recursive algorithm)
* @param cross_corr_max Maximum cross-correlation used for selecting features
* @param min_abs_feat_val minimum absolute feature value
* @param max_abs_feat_val maximum absolute feature value
*/
......@@ -313,6 +329,7 @@ public:
int n_sis_select=1,
int max_store_rung=-1,
int n_rung_generate=0,
double cross_corr_max=1.0,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50
);
......
......@@ -7,6 +7,7 @@ InputParser::InputParser(boost::property_tree::ptree IP, std::string fn, std::sh
_prop_key(IP.get<std::string>("property_key", "prop")),
_task_key(IP.get<std::string>("task_key", "Task")),
_leave_out_inds(as_vector<int>(IP, "leave_out_inds")),
_cross_cor_max(IP.get<double>("max_feat_cross_correlation", 1.0)),
_l_bound(IP.get<double>("min_abs_feat_val", 1e-50)),
_u_bound(IP.get<double>("max_abs_feat_val", 1e50)),
_n_dim(IP.get<int>("desc_dim")),
......@@ -270,7 +271,7 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm, st
for(int ff = 0; ff < headers.size(); ++ff)
phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], test_data[ff], units[ff]));
_feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _prop_train, _task_sizes_train, _max_rung, _n_sis_select, _max_store_rung, _n_rung_generate, _l_bound, _u_bound);
_feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _prop_train, _task_sizes_train, _max_rung, _n_sis_select, _max_store_rung, _n_rung_generate, _cross_cor_max, _l_bound, _u_bound);
}
void stripComments(std::string& filename)
......
......@@ -49,6 +49,7 @@ public:
std::shared_ptr<FeatureSpace> _feat_space; //!< shared_ptr to the FeatureSpace generated from the data file and the input file
double _cross_cor_max; //!< Maximum cross-correlation used for selecting features
double _l_bound; //!< Minimum absolute value allowed for the feature.
double _u_bound; //!< Maximum absolute value allowed for the feature.
......
......@@ -39,8 +39,8 @@ void sisso::feature_creation::registerFeatureSpace()
void (FeatureSpace::*sis_list)(list) = &FeatureSpace::sis;
void (FeatureSpace::*sis_ndarray)(np::ndarray) = &FeatureSpace::sis;
class_<FeatureSpace>("FeatureSpace", init<list, list, np::ndarray, list, optional<int, int, int, int, double, double>>())
.def(init<list, list, list, list, optional<int, int, int, int, double>>())
class_<FeatureSpace>("FeatureSpace", init<list, list, np::ndarray, list, optional<int, int, int, int, double, double, double>>())
.def(init<list, list, list, list, optional<int, int, int, int, double, double, double>>())
.def("sis", sis_list, "@DocString_feat_space_sis_list@")
.def("sis", sis_ndarray, "@DocString_feat_space_sis_arr@")
.def("feat_in_phi", &FeatureSpace::feat_in_phi, "@DocString_feat_space_feat_in_phi@")
......
......@@ -9,6 +9,7 @@ FeatureSpace::FeatureSpace(
int n_sis_select,
int max_store_rung,
int n_rung_generate,
double cross_corr_max,
double min_abs_feat_val,
double max_abs_feat_val
):
......@@ -21,6 +22,7 @@ FeatureSpace::FeatureSpace(
_feature_space_file("feature_space/selected_features.txt"),
_feature_space_summary_file("feature_space/SIS_summary.txt"),
_mpi_comm(mpi_setup::comm),
_cross_cor_max(cross_corr_max),
_l_bound(min_abs_feat_val),
_u_bound(max_abs_feat_val),
_max_phi(max_phi),
......@@ -42,6 +44,7 @@ FeatureSpace::FeatureSpace(
int n_sis_select,
int max_store_rung,
int n_rung_generate,
double cross_corr_max,
double min_abs_feat_val,
double max_abs_feat_val
):
......@@ -54,6 +57,7 @@ FeatureSpace::FeatureSpace(
_feature_space_file("feature_space/selected_features.txt"),
_feature_space_summary_file("feature_space/SIS_summary.txt"),
_mpi_comm(mpi_setup::comm),
_cross_cor_max(cross_corr_max),
_l_bound(min_abs_feat_val),
_u_bound(max_abs_feat_val),
_max_phi(max_phi),
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment