Commit 52e2891c authored by Thomas Purcell's avatar Thomas Purcell
Browse files

FeatureSpace removes similar features for n_rung < _max_phi

should reduce time for feature generation
parent 28c1da42
......@@ -22,6 +22,7 @@ FeatureSpace::FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
std::vector<std::string> allowed_ops,
std::vector<double> prop,
std::vector<int> task_sizes,
int max_phi,
int n_sis_select,
......@@ -47,6 +48,7 @@ FeatureSpace::FeatureSpace(
_n_rung_generate(n_rung_generate)
{
_project = project_funcs::project_r;
if(_n_rung_generate > 1)
throw std::logic_error("A maximum of one rung can be generated on the fly.");
else if(_max_phi - _n_rung_generate < _n_rung_store)
......@@ -61,7 +63,7 @@ FeatureSpace::FeatureSpace(
else
_un_operators.push_back(allowed_op_maps::unary_operator_map[op]);
}
generate_feature_space();
generate_feature_space(prop);
_scores.reserve(_phi.size());
_scores.resize(_phi.size());
}
......@@ -126,10 +128,12 @@ void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std
}
}
void FeatureSpace::generate_feature_space()
void FeatureSpace::generate_feature_space(std::vector<double>& prop)
{
double u_bound = 1e50;
double l_bound = 1e-50;
std::vector<int> inds;
for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn)
{
if(nn == _max_phi)
......@@ -142,9 +146,7 @@ void FeatureSpace::generate_feature_space()
int feat_ind = _phi.size();
for(auto feat_1 = _phi.begin() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
{
generate_new_feats(feat_1, next_phi, feat_ind, l_bound, u_bound);
}
_mpi_comm->barrier();
_start_gen.push_back(_phi.size());
......@@ -164,6 +166,35 @@ void FeatureSpace::generate_feature_space()
++feat_ind;
}
}
if(nn < _max_phi)
{
// Remove identical features
_scores.resize(_phi.size());
_project(prop.data(), _scores.data(), _phi, _task_sizes, 1);
_scores.erase(_scores.begin(), _scores.begin() + _start_gen[_start_gen.size() - 1]);
inds = util_funcs::argsort(_scores);
std::vector<int> del_inds;
for(int sc = _scores.size() - 1; sc > 0; --sc)
{
if(_scores[inds[sc]] - _scores[inds[sc] - 1] < 1e-10)
{
if(1.0 - std::abs(util_funcs::r(_phi[_start_gen.back() + inds[sc]]->value_ptr(), _phi[_start_gen.back() + inds[sc - 1]]->value_ptr(), _n_samp)) < 1e-13)
del_inds.push_back(-1 * (inds[sc] + _start_gen.back()));
}
}
inds = util_funcs::argsort(del_inds);
for(int ii = 0; ii < inds.size(); ++ii)
_phi.erase(_phi.begin() - del_inds[inds[ii]]);
// Reindex
for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
_phi[ff]->reindex(ff);
}
if(nn <= _n_rung_store)
{
bool use_temp = (nn != _max_phi) || (_max_phi > _n_rung_store);
......@@ -500,7 +531,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
inds = util_funcs::argsort(sent_scores);
for(int ii = 0; ii < _n_sis_select; ++ii)
{
std::cout << sent_scores[inds[ii]] << '\t' << sent_phi[inds[ii]]->expr() << std::endl;
std::cout << std::setw(22) << std::setprecision(18) << std::left << sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl;
_phi_selected.push_back(sent_phi[inds[ii]]);
_phi_selected.back()->reindex(cur_feat);
++cur_feat;
......@@ -522,7 +553,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
{
if(valid_score_against_current(cur_feat_local, sent_phi[inds[ii]]->value().data(), sent_scores[inds[ii]], scores_sel, scores_comp))
{
std::cout << sent_scores[inds[ii]] << '\t' << sent_phi[inds[ii]]->expr() << std::endl;
std::cout << std::setw(22) << std::setprecision(18) << std::left << sent_scores[inds[ii]] << sent_phi[inds[ii]]->expr() << std::endl;
_phi_selected.push_back(sent_phi[inds[ii]]);
_phi_selected.back()->reindex(cur_feat);
_phi_selected.back()->set_value();
......@@ -555,7 +586,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
cur_feat_local = 0;
for(auto& feat : phi_sel)
{
std::cout << scores_sel[cur_feat_local] << '\t' << phi_sel[cur_feat_local]->expr() << std::endl;
std::cout << std::setw(22) << std::setprecision(18) << std::left << scores_sel[cur_feat_local] << phi_sel[cur_feat_local]->expr() << std::endl;
_phi_selected.push_back(feat);
_phi_selected.back()->reindex(cur_feat);
_phi_selected.back()->set_value();
......
......@@ -64,6 +64,7 @@ public:
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
std::vector<std::string> allowed_ops,
std::vector<double> prop,
std::vector<int> task_sizes,
int max_phi=1,
int n_sis_select=1,
......@@ -77,7 +78,7 @@ public:
* @brief Generate the full feature set from the allowed operators and initial feature set
* @details populates phi with all features from an initial set and the allowed operators
*/
void generate_feature_space();
void generate_feature_space(std::vector<double>& prop);
/**
* @brief Accessor function for _phi_selected
......
......@@ -268,7 +268,7 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm, st
for(int ff = 0; ff < headers.size(); ++ff)
phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], test_data[ff], units[ff]));
_feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _task_sizes_train, _max_rung, _n_sis_select, _max_store_rung, _n_rung_generate, _l_bound, _u_bound);
_feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _prop_train, _task_sizes_train, _max_rung, _n_sis_select, _max_store_rung, _n_rung_generate, _l_bound, _u_bound);
}
void stripComments(std::string& filename)
......
......@@ -81,6 +81,20 @@ namespace util_funcs
std::sort(begin, end, [&vec](int i1, int i2){return vec[i1] < vec[i2];});
}
inline std::vector<int> argsort(std::vector<int>& vec)
{
std::vector<int> index(vec.size());
std::iota(index.begin(), index.end(), 0);
std::sort(index.begin(), index.end(), [&vec](int i1, int i2){return vec[i1] < vec[i2];});
return index;
}
inline void argsort(int* begin, int* end, std::vector<int>& vec)
{
std::sort(begin, end, [&vec](int i1, int i2){return vec[i1] < vec[i2];});
}
inline int factorial(int n)
{
return (n == 1 || n == 0) ? 1 : factorial(n - 1) * n;
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment