Commit dd81f2ef authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Regression seems to work with only open mp

Now to adjust the classfier
parent 2caa6202
......@@ -96,7 +96,7 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
for(int rr = 0; rr < n_dim; ++rr)
{
inds[rr] = _feat_space->phi_selected().size() - 1 - rr;
n_interactions *= inds[rr];
n_interactions *= inds[rr] + 1;
n_dim_fact *= (rr + 1);
}
n_interactions /= n_dim_fact;
......@@ -172,7 +172,6 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
int index = all_inds_min[inds[rr] * n_dim + ii];
min_nodes[ii] = std::make_shared<ModelNode>(_feat_space->phi_selected()[index]->arr_ind(), _feat_space->phi_selected()[index]->rung(), _feat_space->phi_selected()[index]->expr(), _feat_space->phi_selected()[index]->postfix_expr(), _feat_space->phi_selected()[index]->value(), _feat_space->phi_selected()[index]->test_value(), _feat_space->phi_selected()[index]->unit());
}
// #pragma omp critical
models.push_back(ModelRegressor(_prop_unit, _prop, _prop_test, min_nodes, _task_sizes_train, _task_sizes_test, _fix_intercept));
}
......@@ -183,47 +182,49 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
void SISSORegressor::fit()
{
std::vector<double> residual(_n_samp * _n_residual);
std::vector<double> residual(_prop);
std::clock_t start;
double duration;
start = std::clock();
_feat_space->sis(_prop);
_mpi_comm->barrier();
duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
if(_mpi_comm->rank() == 0)
std::cout << "Time for SIS: " << duration << std::endl;
start = std::clock();
std::vector<ModelRegressor> models;
for(int rr = 0; rr < std::max(_n_residual, _n_models_store); ++rr)
{
node_value_arrs::clear_temp_test_reg();
model_node_ptr model_feat = std::make_shared<ModelNode>(_feat_space->phi_selected()[rr]->arr_ind(), _feat_space->phi_selected()[rr]->rung(), _feat_space->phi_selected()[rr]->expr(), _feat_space->phi_selected()[rr]->postfix_expr(), _feat_space->phi_selected()[rr]->value(), _feat_space->phi_selected()[rr]->test_value(), _feat_space->phi_selected()[rr]->unit());
models.push_back(ModelRegressor(_prop_unit, _prop, _prop_test, {model_feat}, _task_sizes_train, _task_sizes_test, _fix_intercept));
if(rr < _n_residual)
models.back().copy_error(&residual[rr * _n_samp]);
if((_mpi_comm->rank() == 0) && (rr < _n_models_store))
{
models.back().to_file("models/train_dim_1_model_" + std::to_string(rr) + ".dat");
if(_leave_out_inds.size() > 0)
models.back().to_file("models/test_dim_1_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds);
}
}
_models.push_back(models);
_mpi_comm->barrier();
duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
if(_mpi_comm->rank() == 0)
std::cout << "Time for Model: " << duration << std::endl;
for(int dd = 2; dd <= _n_dim; ++dd)
// _feat_space->sis(_prop);
// _mpi_comm->barrier();
// duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
// if(_mpi_comm->rank() == 0)
// std::cout << "Time for SIS: " << duration << std::endl;
// start = std::clock();
// std::vector<ModelRegressor> models;
// for(int rr = 0; rr < std::max(_n_residual, _n_models_store); ++rr)
// {
// node_value_arrs::clear_temp_test_reg();
// model_node_ptr model_feat = std::make_shared<ModelNode>(_feat_space->phi_selected()[rr]->arr_ind(), _feat_space->phi_selected()[rr]->rung(), _feat_space->phi_selected()[rr]->expr(), _feat_space->phi_selected()[rr]->postfix_expr(), _feat_space->phi_selected()[rr]->value(), _feat_space->phi_selected()[rr]->test_value(), _feat_space->phi_selected()[rr]->unit());
// models.push_back(ModelRegressor(_prop_unit, _prop, _prop_test, {model_feat}, _task_sizes_train, _task_sizes_test, _fix_intercept));
// if(rr < _n_residual)
// models.back().copy_error(&residual[rr * _n_samp]);
// if((_mpi_comm->rank() == 0) && (rr < _n_models_store))
// {
// models.back().to_file("models/train_dim_1_model_" + std::to_string(rr) + ".dat");
// if(_leave_out_inds.size() > 0)
// models.back().to_file("models/test_dim_1_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds);
// }
// }
// _models.push_back(models);
// _mpi_comm->barrier();
// duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
// if(_mpi_comm->rank() == 0)
// std::cout << "Time for Model: " << duration << std::endl;
for(int dd = 1; dd <= _n_dim; ++dd)
{
start = std::clock();
start = std::clock();
_feat_space->sis(residual);
if(dd == 1)
residual.resize(_n_samp * _n_residual);
_mpi_comm->barrier();
duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
......
......@@ -80,9 +80,15 @@ void FeatureSpace::initialize_fs(std::vector<double> prop, std::string project_t
}
if(project_type.compare("regression") == 0)
{
_project = project_funcs::project_r2;
_project_no_omp = project_funcs::project_r2_no_omp;
}
else if(project_type.compare("classification") == 0)
{
_project = project_funcs::project_classify;
_project_no_omp = project_funcs::project_classify_no_omp;
}
else
throw std::logic_error("Wrong projection type passed to FeatureSpace constructor.");
......@@ -445,7 +451,7 @@ void FeatureSpace::generate_feature_space(std::vector<double>& prop)
_n_feat = _phi.size();
}
void FeatureSpace::project_generated(double* prop, int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel, std::vector<double>& scores_comp)
void FeatureSpace::project_generated(double* prop, int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel)
{
std::vector<double> scores_prev_sel;
if(node_value_arrs::N_SELECTED > _n_sis_select)
......@@ -457,65 +463,61 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt
int worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
double worst_score = scores_sel[worst_score_ind];
#pragma omp parallel firstprivate(worst_score, worst_score_ind)
std::array<int, 2> start_end = _mpi_comm->get_start_end_from_list(_phi.size() - _start_gen.back(), _start_gen.back());
int feat_ind = _phi.size();
#pragma omp parallel firstprivate(worst_score, worst_score_ind, prop, size) shared(phi_sel, scores_sel)
{
std::vector<node_ptr>& phi_sel_private(phi_sel);
std::vector<double>& scores_sel_private(scores_sel);
std::vector<node_ptr> phi_sel_private(phi_sel);
std::vector<double> scores_sel_private(scores_sel);
std::array<int, 2> start_end = _mpi_comm->get_start_end_from_list(_phi.size() - _start_gen.back(), _start_gen.back());
int feat_ind = _phi.size();
#pragma omp for schedule(dynamic)
for(auto feat = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
for(auto feat = _phi.begin() + start_end[0]; feat < _phi.begin() + start_end[1]; ++feat)
{
node_value_arrs::clear_temp_reg();
node_value_arrs::clear_temp_test_reg();
int feat_ind = _phi.size();
node_value_arrs::clear_temp_reg_thread();
std::vector<node_ptr> generated_phi;
generate_new_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound);
#pragma omp critical
{
std::vector<double> scores(generated_phi.size(), 0.0);
_project(prop, scores.data(), generated_phi, _task_sizes, size / _n_samp);
if(generated_phi.size() == 0)
continue;
std::vector<int> inds = util_funcs::argsort(scores);
std::vector<double> scores(generated_phi.size());
_project_no_omp(prop, scores.data(), generated_phi, _task_sizes, size / _n_samp);
int ii = 0;
std::vector<int> inds = util_funcs::argsort(scores);
while(scores[inds[ii]] < -1.0)
++ii;
int ii = 0;
while((ii < inds.size()) && (scores[inds[ii]] < -1.0))
++ii;
while((ii < inds.size()) && (scores[inds[ii]] < worst_score))
while((ii < inds.size()) && (scores[inds[ii]] < worst_score))
{
double cur_score = scores[inds[ii]];
if((valid_feature_against_selected(generated_phi[inds[ii]]->value_ptr(), node_value_arrs::N_SELECTED - _n_sis_select)) && valid_feature_against_private_selected(generated_phi[inds[ii]]->value_ptr(), phi_sel_private))
{
double cur_score = scores[inds[ii]];
if(valid_feature_against_selected(generated_phi[inds[ii]]->value_ptr(), node_value_arrs::N_SELECTED - _n_sis_select + scores_sel_private.size()))
if(scores_sel_private.size() == _n_sis_select)
{
if(scores_sel_private.size() == _n_sis_select)
{
phi_sel_private[worst_score_ind]->set_selected(false);
phi_sel_private[worst_score_ind]->set_d_mat_ind(-1);
phi_sel_private[worst_score_ind] = generated_phi[inds[ii]];
scores_sel_private[worst_score_ind] = cur_score;
phi_sel_private[worst_score_ind]->set_selected(true);
phi_sel_private[worst_score_ind]->set_d_mat_ind(node_value_arrs::N_SELECTED - _n_sis_select + worst_score_ind);
phi_sel_private[worst_score_ind]->set_value();
}
else
{
phi_sel_private.push_back(generated_phi[inds[ii]]);
scores_sel_private.push_back(cur_score);
phi_sel_private[worst_score_ind]->set_selected(false);
phi_sel_private[worst_score_ind]->set_d_mat_ind(-1);
phi_sel_private.back()->set_selected(true);
phi_sel_private.back()->set_d_mat_ind(node_value_arrs::N_SELECTED - _n_sis_select + scores_sel_private.size());
phi_sel_private.back()->set_value();
}
worst_score_ind = std::max_element(scores_sel_private.begin(), scores_sel_private.end()) - scores_sel_private.begin();
worst_score = scores_sel_private[worst_score_ind];
phi_sel_private[worst_score_ind] = generated_phi[inds[ii]];
scores_sel_private[worst_score_ind] = cur_score;
}
else
{
phi_sel_private.push_back(generated_phi[inds[ii]]);
scores_sel_private.push_back(cur_score);
}
++ii;
worst_score_ind = std::max_element(scores_sel_private.begin(), scores_sel_private.end()) - scores_sel_private.begin();
worst_score = scores_sel_private[worst_score_ind];
}
++ii;
}
}
#pragma omp critical
......@@ -523,7 +525,7 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt
worst_score_ind = std::max_element(scores_sel.begin(), scores_sel.end()) - scores_sel.begin();
for(int sc = 0; sc < scores_sel_private.size(); ++sc)
{
if(scores_sel_private[sc] < scores_sel[worst_score_ind])
if((scores_sel_private[sc] < scores_sel[worst_score_ind]) && valid_feature_against_private_selected(phi_sel_private[sc]->value_ptr(), phi_sel))
{
scores_sel[worst_score_ind] = scores_sel_private[sc];
phi_sel[worst_score_ind] = phi_sel_private[sc];
......@@ -551,6 +553,18 @@ bool FeatureSpace::valid_feature_against_selected(double* val_ptr, int end_sel,
return is_valid;
}
bool FeatureSpace::valid_feature_against_private_selected(double* val_ptr, std::vector<node_ptr>& selected)
{
double base_val = util_funcs::r(val_ptr, val_ptr, _n_samp);
for(auto& feat : selected)
{
if(base_val - std::abs(util_funcs::r(feat->value_ptr(1), val_ptr, _n_samp)) < 1.0 - _cross_cor_max + 1e-10)
return false;
}
return true;
}
void FeatureSpace::sis(std::vector<double>& prop)
{
boost::filesystem::path p(_feature_space_file.c_str());
......@@ -562,7 +576,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
std::ofstream sum_file_stream = std::ofstream();
sum_file_stream.open(_feature_space_summary_file, std::ios::app);
std::vector<double> scores_comp(std::max(node_value_arrs::N_SELECTED, _n_sis_select), 1.0);
std::vector<double> scores_sel(_n_sis_select, 0.0);
std::vector<node_ptr> phi_sel;
......@@ -611,7 +624,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
{
phi_sel.resize(cur_feat_local);
scores_sel.resize(cur_feat_local);
project_generated(prop.data(), prop.size(), phi_sel, scores_sel, scores_comp);
project_generated(prop.data(), prop.size(), phi_sel, scores_sel);
node_value_arrs::clear_temp_reg();
node_value_arrs::clear_temp_test_reg();
}
......@@ -648,7 +661,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
cur_feat_local = 0;
int ii = 0;
std::fill_n(scores_comp.begin(), _n_sis_select, 1.0);
scores_sel = std::vector<double>(_n_sis_select, 0.0);
// Get the n_sis_select best features (compare against features sent from other processes)
......@@ -704,8 +716,10 @@ void FeatureSpace::sis(std::vector<double>& prop)
out_file_stream << std::setw(14) <<std::left << cur_feat << phi_sel[ind]->postfix_expr() << std::endl;
sum_file_stream << std::setw(14) <<std::left << cur_feat << std::setw(24) << std::setprecision(18) << std::left << -1 * scores_sel[ind] << phi_sel[ind]->expr() << std::endl;
_phi_selected.push_back(phi_sel[ind]);
_phi_selected.back()->set_selected(true);
_phi_selected.back()->set_d_mat_ind(cur_feat);
_phi_selected.back()->set_value();
++cur_feat;
++cur_feat_local;
}
......
......@@ -54,6 +54,7 @@ class FeatureSpace
std::string _feature_space_summary_file; //!< File to store information about the selected features
std::function<void(double*, double*, std::vector<node_ptr>&, std::vector<int>&, int)> _project; //!< Function used to calculate the scores for SIS
std::function<void(double*, double*, std::vector<node_ptr>&, std::vector<int>&, int)> _project_no_omp; //!< Function used to calculate the scores for SIS without changing omp environment
std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPI communicator
double _cross_cor_max; //!< Maximum cross-correlation used for selecting features
......@@ -221,7 +222,7 @@ public:
* @param scores_selected The projection scores of the features that would be selected from the previous rungs
* @param scores_comp vector to store temporary score comparisons
*/
void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected);
/**
* @brief Checks the feature to see if it is still valid against previously selected features
......@@ -233,6 +234,8 @@ public:
*/
bool valid_feature_against_selected(double* val_ptr, int end_sel, int start_sel = 0);
bool valid_feature_against_private_selected(double* val_ptr, std::vector<node_ptr>& selected);
/**
* @brief Perform SIS on a feature set with a specified property
* @details Perform sure-independence screening with either the correct property or the error
......
......@@ -164,6 +164,12 @@ namespace node_value_arrs
*/
inline void clear_temp_reg(){std::fill_n(TEMP_STORAGE_REG.data(), TEMP_STORAGE_REG.size(), -1);}
/**
* @brief Flush the temporary storage register (training data)
* @details Reset all slots in the register to -1
*/
inline void clear_temp_reg_thread(){std::fill_n(TEMP_STORAGE_REG.data() + N_STORE_FEATURES * 3 * omp_get_thread_num(), N_STORE_FEATURES * 3, -1);}
/**
* @brief Flush the temporary storage register (test data)
* @details Reset all slots in the register to -1
......
......@@ -4,14 +4,22 @@ void project_funcs::project_r(double* prop, double* scores, std::vector<node_ptr
{
int n_samp = std::accumulate(sizes.begin(), sizes.end(), 0);
std::transform(phi.begin(), phi.end(), scores, [&prop, &sizes](node_ptr feat){return -1 * util_funcs::r(prop, feat->value_ptr(), sizes.data(), sizes.size());});
for(int pp = 1; pp < n_prop; ++pp)
#pragma omp parallel firstprivate(prop)
{
prop += n_samp;
std::transform(phi.begin(), phi.end(), scores, scores, [&prop, &sizes](node_ptr feat, double sc){return std::min(-1 * util_funcs::r(prop, feat->value_ptr(), sizes.data(), sizes.size()), sc);});
int start = omp_get_thread_num() * (phi.size() / omp_get_num_threads()) + std::min(omp_get_thread_num(), static_cast<int>(phi.size()) % omp_get_num_threads());
int end = (omp_get_thread_num() + 1) * (phi.size() / omp_get_num_threads()) + std::min(omp_get_thread_num() + 1, static_cast<int>(phi.size()) % omp_get_num_threads());
std::transform(phi.begin(), phi.end(), scores, [&prop, &sizes](node_ptr feat){return -1 * util_funcs::r(prop, feat->value_ptr(), sizes.data(), sizes.size());});
for(int pp = 1; pp < n_prop; ++pp)
{
prop += n_samp;
std::transform(phi.begin(), phi.end(), scores, scores, [&prop, &sizes](node_ptr feat, double sc){return std::min(-1 * util_funcs::r(prop, feat->value_ptr(), sizes.data(), sizes.size()), sc);});
}
std::transform(scores, scores + phi.size(), scores, [](double score){return std::isnan(score) ? 0.0 : score;});
#pragma omp barrier
}
std::transform(scores, scores + phi.size(), scores, [](double score){return std::isnan(score) ? 0.0 : score;});
}
void project_funcs::project_r2(double* prop, double* scores, std::vector<node_ptr>& phi, std::vector<int>& sizes, int n_prop)
......@@ -59,3 +67,47 @@ void project_funcs::project_classify(double* prop, double* scores, std::vector<n
#pragma omp barrier
}
}
void project_funcs::project_r_no_omp(double* prop, double* scores, std::vector<node_ptr>& phi, std::vector<int>& sizes, int n_prop)
{
int n_samp = std::accumulate(sizes.begin(), sizes.end(), 0);
std::transform(phi.begin(), phi.end(), scores, [&prop, &sizes](node_ptr feat){return -1 * util_funcs::r(prop, feat->value_ptr(), sizes.data(), sizes.size());});
for(int pp = 1; pp < n_prop; ++pp)
{
prop += n_samp;
std::transform(phi.begin(), phi.end(), scores, scores, [&prop, &sizes](node_ptr feat, double sc){return std::min(-1 * util_funcs::r(prop, feat->value_ptr(), sizes.data(), sizes.size()), sc);});
}
std::transform(scores, scores + phi.size(), scores, [](double score){return std::isnan(score) ? 0.0 : score;});
}
void project_funcs::project_r2_no_omp(double* prop, double* scores, std::vector<node_ptr>& phi, std::vector<int>& sizes, int n_prop)
{
int n_samp = std::accumulate(sizes.begin(), sizes.end(), 0);
std::transform(phi.begin(), phi.end(), scores, [&prop, &sizes](node_ptr feat){return -1 * util_funcs::r2(prop, feat->value_ptr(), sizes.data(), sizes.size());});
for(int pp = 1; pp < n_prop; ++pp)
{
prop += n_samp;
std::transform(phi.begin(), phi.end(), scores, scores, [&prop, &sizes](node_ptr feat, double sc){return std::min(-1 * util_funcs::r2(prop, feat->value_ptr(), sizes.data(), sizes.size()), sc);});
}
std::transform(scores, scores + phi.size(), scores, [](double score){return std::isnan(score) ? 0.0 : score;});
}
void project_funcs::project_classify_no_omp(double* prop, double* scores, std::vector<node_ptr>& phi, std::vector<int>& sizes, int n_prop)
{
int n_samp = std::accumulate(sizes.begin(), sizes.end(), 0);
convex_hull::initialize_projection(sizes, prop);
std::transform(phi.begin(), phi.end(), scores, [](node_ptr feat){return convex_hull::overlap_1d(feat->value_ptr());});
for(int pp = 1; pp < n_prop; ++pp)
{
prop += n_samp;
convex_hull::initialize_projection(sizes, prop);
std::transform(phi.begin(), phi.end(), scores, scores, [](node_ptr feat, double score){return std::min(convex_hull::overlap_1d(feat->value_ptr()), score);});
}
std::transform(scores, scores + phi.size(), scores, [](double score){return std::isnan(score) ? std::numeric_limits<double>::infinity() : score;});
}
......@@ -48,6 +48,40 @@ namespace project_funcs
* @param n_prop number of properties
*/
void project_classify(double* prop, double* scores, std::vector<node_ptr>& phi, std::vector<int>& size, int n_prop);
/**
* @brief Calculate the projection scores of a set of features to a vector via Pearson correlation
*
* @param prop The pointer to the property vector to calculate the Pearson correlation against
* @param scores The pointer of the score vector
* @param phi The set of features to calculate the Pearson correlation of
* @param size Vector of the size of all of the tasks
* @param n_prop The number of properties to calculate the Pearson Correlation of and return the maximum of
*/
void project_r_no_omp(double* prop, double* scores, std::vector<node_ptr>& phi, std::vector<int>& size, int n_prop);
/**
* @brief Calculate the projection scores of a set of features to a vector via Coefficient of Determination
*
* @param prop The pointer to the property vector to calculate the Coefficient of Determination against
* @param scores The pointer of the score vector
* @param phi The set of features to calculate the Coefficient of Determination of
* @param size Vector of the size of all of the tasks
* @param n_prop The number of properties to calculate the Coefficient of Determination of and return the maximum of
*/
void project_r2_no_omp(double* prop, double* scores, std::vector<node_ptr>& phi, std::vector<int>& size, int n_prop);
/**
* @brief Calculate projection scores for classification
* @details Create a 1D-convex hull for each class and calculate the projection score
*
* @param prop The classes to separate the property
* @param scores the vector to store the projection scores
* @param size list of the sizes for each task
* @param n_prop number of properties
*/
void project_classify_no_omp(double* prop, double* scores, std::vector<node_ptr>& phi, std::vector<int>& size, int n_prop);
}
#endif
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment