Commit 8eed2356 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Modifiy reparamterization

Actually create new features with the reparameterized nodes
parent d55c8d8d
......@@ -58,6 +58,8 @@ FeatureSpace::FeatureSpace(
):
_phi(phi_0),
_phi_0(phi_0),
_end_no_params(1, phi_0.size()),
_start_gen_reparam(1, 0),
_allowed_param_ops(allowed_param_ops),
_allowed_ops(allowed_ops),
_prop(prop),
......@@ -232,7 +234,7 @@ void FeatureSpace::initialize_fs_output_files() const
}
#ifdef PARAMETERIZE
void FeatureSpace::generate_new_feats(
void FeatureSpace::generate_param_feats(
std::vector<node_ptr>::iterator& feat,
std::vector<node_ptr>& feat_set,
unsigned long int& feat_ind,
......@@ -241,31 +243,44 @@ void FeatureSpace::generate_new_feats(
const double u_bound
)
{
unsigned long int phi_ind = feat - _phi.begin();
feat_set.reserve(feat_set.size() + _un_operators.size() + phi_ind * (_com_bin_operators.size() + 2 * _bin_operators.size()));
feat_set.reserve(feat_set.size() + _un_param_operators.size() + phi_ind * (_com_bin_param_operators.size() + 2 * _bin_param_operators.size()));
for(auto& op : _un_operators)
for(auto& op : _un_param_operators)
{
op(feat_set, *feat, feat_ind, l_bound, u_bound);
op(feat_set, *feat, feat_ind, l_bound, u_bound, optimizer);
}
for(auto& op : _com_bin_operators)
for(auto& op : _com_bin_param_operators)
{
for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2)
for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound);
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
}
}
for(auto& op : _bin_operators)
for(auto& op : _bin_param_operators)
{
for(auto feat_2 = _phi.begin(); feat_2 < feat; ++feat_2)
for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound);
op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound);
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
}
}
}
void FeatureSpace::generate_reparam_feats(
std::vector<node_ptr>::iterator& feat,
std::vector<node_ptr>& feat_set,
unsigned long int& feat_ind,
std::shared_ptr<NLOptimizer> optimizer,
const double l_bound,
const double u_bound
)
{
int cur_rung = (*feat)->rung();
unsigned long int max_n_feat = _end_no_params[cur_rung] + _phi_reparam.size();
feat_set.reserve(feat_set.size() + _un_operators.size() + max_n_feat * (_com_bin_operators.size() + 2 * _bin_operators.size()));
for(auto& op : _un_param_operators)
{
op(feat_set, *feat, feat_ind, l_bound, u_bound, optimizer);
......@@ -273,22 +288,229 @@ void FeatureSpace::generate_new_feats(
for(auto& op : _com_bin_param_operators)
{
for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
for(int rr = 0; rr <= cur_rung; ++rr)
{
for(auto feat_2 = _phi.begin() + _start_gen[rr]; (feat_2 != feat) || (feat_2 != _phi.begin() + _end_no_params[rr]); ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
}
}
for(auto feat_2 = _phi_reparam.begin(); (feat_2 != feat) || (feat_2 != _phi_reparam.end()); ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
}
}
for(auto& op : _bin_param_operators)
{
for(auto feat_2 = _phi.begin(); feat_2 != feat; ++feat_2)
for(int rr = 0; rr <= cur_rung; ++rr)
{
for(auto feat_2 = _phi.begin() + _start_gen[rr]; (feat_2 != feat) || (feat_2 != _phi.begin() + _end_no_params[rr]); ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
}
}
for(auto feat_2 = _phi_reparam.begin(); (feat_2 != feat) || (feat_2 != _phi_reparam.end()); ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
}
}
}
#else
void FeatureSpace::generate_new_feats(
void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop)
{
double u_bound = 1e50;
double l_bound = 1e-50;
std::vector<int> inds;
for(int nn = 1; nn <= _max_phi - _n_rung_generate; ++nn)
{
node_value_arrs::clear_temp_reg();
if(nn == _max_phi)
{
u_bound = _u_bound;
l_bound = _l_bound;
}
std::vector<node_ptr> next_phi;
_n_feat = _phi.size();
unsigned long int feat_ind = _phi.size() + _phi_reparam.size();
node_value_arrs::clear_temp_reg();
double start = omp_get_wtime();
#pragma omp parallel firstprivate(feat_ind, l_bound, u_bound)
{
std::vector<node_ptr> next_phi_private;
std::shared_ptr<NLOptimizer> optimizer_param = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth);
std::shared_ptr<NLOptimizer> optimizer_reparam = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop, _max_phi, _max_param_depth);
#pragma omp for schedule(dynamic)
for(auto feat_1 = _phi_reparam.begin() + _start_gen_reparam.back() + _mpi_comm->rank(); feat_1 < _phi_reparam.end(); feat_1 += _mpi_comm->size())
{
generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound);
generate_param_feats(feat_1, next_phi_private, feat_ind, optimizer_param, l_bound, u_bound);
}
#pragma omp for schedule(dynamic)
for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
{
generate_reparam_feats(feat_1, next_phi_private, feat_ind, optimizer_reparam, l_bound, u_bound);
}
#pragma omp critical
next_phi.insert(next_phi.end(), next_phi_private.begin(), next_phi_private.end());
}
_start_gen_reparam.push_back(_phi_reparam.size());
node_value_arrs::clear_temp_reg();
if(nn < _max_phi)
{
int new_phi_size;
int phi_size_start = _phi.size();
if(_mpi_comm->rank() == 0)
{
std::vector<std::vector<node_ptr>> next_phi_gathered;
mpi::gather(*_mpi_comm, next_phi, next_phi_gathered, 0);
feat_ind = _phi.size();
for(auto& next_phi_vec : next_phi_gathered)
{
_phi.insert(_phi.end(), next_phi_vec.begin(), next_phi_vec.end());
}
new_phi_size = _phi.size();
// Sort the features to ensure consistent feature spaces for all MPI/OpenMP configurations
std::sort(
_phi.begin() + _start_gen.back(),
_phi.end(),
[feat_ind](node_ptr n1, node_ptr n2){return n1->sort_score(feat_ind) < n2->sort_score(feat_ind);}
);
// Reindex sorted features
std::for_each(
_phi.begin() + _start_gen.back(),
_phi.end(),
[&feat_ind](node_ptr n){n->reindex(feat_ind); ++feat_ind;}
);
mpi::broadcast(*_mpi_comm, new_phi_size, 0);
for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
{
mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
}
}
else
{
mpi::gather(*_mpi_comm, next_phi, 0);
mpi::broadcast(*_mpi_comm, new_phi_size, 0);
_phi.resize(new_phi_size);
for(int bb = 0; bb <= (new_phi_size - phi_size_start) / 10000; ++bb)
{
mpi::broadcast(*_mpi_comm, &_phi[phi_size_start + bb * 10000], std::min(10000, new_phi_size - phi_size_start - bb * 10000), 0);
}
}
if(phi_size_start == new_phi_size)
{
throw std::logic_error("No features created during this rung (" + std::to_string(nn) + ")");
}
node_value_arrs::clear_temp_reg();
if(nn < _max_phi)
{
// Remove identical features
std::vector<double> scores(_phi_reparam.size());
_mpi_comm->barrier();
project_funcs::project_r(_prop.data(), scores.data(), _phi_reparam, _task_sizes, 1);
scores.erase(scores.begin(), scores.begin() + _start_gen_reparam.back());
inds = util_funcs::argsort<double>(scores);
std::vector<int> del_inds;
_mpi_comm->barrier();
node_value_arrs::clear_temp_reg();
for(int sc = 0; sc < _scores.size() - 1; ++sc)
{
#ifdef PARAMETERIZE
if(_phi_reparam[inds[sc] + _start_gen_reparam.back()]->n_params() > 0)
{
continue;
}
#endif
if(_scores[inds[sc]] > -1e-10)
{
double base_val = std::abs(
util_funcs::r(
_phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
_phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
_n_samp
)
);
for(int sc2 = sc + 1; sc2 < _scores.size(); ++sc2)
{
double comp = std::abs(
base_val - std::abs(
util_funcs::r(
_phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
_phi_reparam[_start_gen_reparam.back() + inds[sc2]]->value_ptr(0, true),
_n_samp
)
)
);
if(comp < 1e-10)
{
del_inds.push_back(-1 * (inds[sc] + _start_gen_reparam.back()));
break;
}
}
}
else if(_scores[inds[sc + 1]] - _scores[inds[sc]] < 1e-10)
{
double base_val = std::abs(
util_funcs::r(
_phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
_phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
_n_samp
)
);
double comp = std::abs(
base_val - std::abs(
util_funcs::r(
_phi_reparam[_start_gen_reparam.back() + inds[sc]]->value_ptr(),
_phi_reparam[_start_gen_reparam.back() + inds[sc + 1]]->value_ptr(0, true),
_n_samp
)
)
);
if(comp < 1e-10)
{
del_inds.push_back(-1 * (inds[sc] + _start_gen.back()));
}
}
}
inds = util_funcs::argsort<int>(del_inds);
for(int ii = 0; ii < inds.size(); ++ii)
{
_phi_reparam.erase(_phi_reparam.begin() - del_inds[inds[ii]]);
}
// Reindex
for(int ff = _start_gen.back(); ff < _phi_reparam.size(); ++ff)
{
_phi_reparam[ff]->reindex(ff);
}
}
}
}
_n_feat = _phi.size();
}
#endif
void FeatureSpace::generate_non_param_feats(
std::vector<node_ptr>::iterator& feat,
std::vector<node_ptr>& feat_set,
unsigned long int& feat_ind,
......@@ -321,7 +543,6 @@ void FeatureSpace::generate_new_feats(
}
}
}
#endif
void FeatureSpace::generate_feature_space()
{
......@@ -353,7 +574,8 @@ void FeatureSpace::generate_feature_space()
#pragma omp for schedule(dynamic)
for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
{
generate_new_feats(feat_1, next_phi_private, feat_ind, optimizer, l_bound, u_bound);
generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound);
generate_param_feats(feat_1, next_phi_private, feat_ind, optimizer, l_bound, u_bound);
}
#pragma omp critical
......@@ -366,7 +588,7 @@ void FeatureSpace::generate_feature_space()
#pragma omp for schedule(dynamic)
for(auto feat_1 = _phi.begin() + _start_gen.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
{
generate_new_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound);
generate_non_param_feats(feat_1, next_phi_private, feat_ind, l_bound, u_bound);
}
#pragma omp critical
......@@ -509,6 +731,29 @@ void FeatureSpace::generate_feature_space()
_phi.erase(_phi.begin() - del_inds[inds[ii]]);
}
// Reorder features based on the number of parameters they have (none goes first)
std::vector<int> feat_n_params(_phi.size() - _start_gen.back());
std::transform(
_phi.begin() + _start_gen.back(),
_phi.end(),
feat_n_params.begin(),
[](node_ptr feat){return feat->n_params();}
);
inds = util_funcs::argsort<int>(feat_n_params);
next_phi.resize(feat_n_params.size());
std::copy_n(_phi.begin() + _start_gen.back(), feat_n_params.size(), next_phi.begin());
std::transform(
inds.begin(),
inds.end(),
_phi.begin() + _start_gen.back(),
[&next_phi](int ind){return next_phi[ind];}
);
// Set how many features have no parameters
_end_no_params.push_back(
std::count_if(feat_n_params.begin(), feat_n_params.end(), [](int n_param){return n_param == 0;})
);
// Reindex
for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
{
......@@ -700,16 +945,17 @@ void FeatureSpace::project_generated(const double* prop, const int size, std::ve
int index_base = _phi.size() + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size());
#ifdef PARAMETERIZE
std::shared_ptr<NLOptimizer> optimizer;
std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth);
std::shared_ptr<NLOptimizer> reparam_optimizer;
if(_reparam_residual)
{
std::vector<double> prop_vec(size, 0.0);
std::copy_n(prop, size, prop_vec.data());
optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop_vec, _max_phi, _max_param_depth);
reparam_optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop_vec, _max_phi, _max_param_depth);
}
else
{
optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_phi, _max_param_depth);
reparam_optimizer = nullptr;
}
#endif
......@@ -724,9 +970,14 @@ void FeatureSpace::project_generated(const double* prop, const int size, std::ve
bool is_sel = (*feat)->selected();
(*feat)->set_selected(false);
#ifdef PARAMETERIZE
generate_new_feats(feat, generated_phi, feat_ind, optimizer, _l_bound, _u_bound);
generate_non_param_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound);
generate_param_feats(feat, generated_phi, feat_ind, optimizer, _l_bound, _u_bound);
if(reparam_optimizer)
{
generate_reparam_feats(feat, generated_phi, feat_ind, reparam_optimizer, _l_bound, _u_bound);
}
#else
generate_new_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound);
generate_non_param_feats(feat, generated_phi, feat_ind, _l_bound, _u_bound);
#endif
(*feat)->set_selected(is_sel);
......@@ -833,47 +1084,8 @@ void FeatureSpace::sis(const std::vector<double>& prop)
if(_reparam_residual && (_phi_selected.size() > 0))
{
double start_time = omp_get_wtime();
// Make a hard copy of the previously selected features
node_ptr copy;
for(int ff = _phi_selected.size() - _n_sis_select; ff < _phi_selected.size(); ++ff)
{
copy = _phi_selected[ff]->hard_copy();
_phi_selected[ff]->set_selected(false);
_phi_selected[ff]->set_d_mat_ind(-1);
_phi_selected[ff] = copy;
}
// Resize values array for reparameterization
node_value_arrs::resize_values_arr(0, _start_gen[1]);
// Reparameterize based on residuals
#pragma omp parallel
{
std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop, _max_phi, _max_param_depth);
#pragma omp for schedule(dynamic)
for(int ff = _start_gen[1]; ff < _phi.size(); ++ff)
{
if(_phi[ff]->n_params() > 0)
{
_phi[ff]->get_parameters(optimizer);
}
}
}
// Reset the stored feature values
int max_store = (_n_rung_store == _max_phi) ? _phi.size() : _start_gen[_n_rung_store + 1];
node_value_arrs::resize_values_arr(_n_rung_store, max_store);
for(int ff = _start_gen[1]; ff < max_store; ++ff)
{
_phi[ff]->set_value();
_phi[ff]->test_value();
}
_mpi_comm->barrier();
if(_mpi_comm->rank() == 0)
{
std::cout << "The time for reparameterization: " << omp_get_wtime() - start_time << " s" << std::endl;
}
_phi_reparam.resize(0);
generate_reparam_feature_set(prop);
}
#endif
// Create output directories if needed
......
......@@ -45,6 +45,10 @@ class FeatureSpace
const std::vector<node_ptr> _phi_0; //!< initial feature space
#ifdef PARAMETERIZE
std::vector<node_ptr> _phi_reparam; //!< The list of nodes used for reparameterization
std::vector<int> _end_no_params; //!< The list of indexes of each rung where parameterized nodes start
std::vector<int> _start_gen_reparam; //!< The list of indexes of each rung where parameterized nodes start
std::vector<un_param_op_node_gen> _un_param_operators; //!< list of all parameterized unary operators with free parameters
std::vector<bin_param_op_node_gen> _com_bin_param_operators; //!< list of all parameterized commutable binary operators with free parameters
std::vector<bin_param_op_node_gen> _bin_param_operators; //!< list of all parameterized binary operators with free parameters
......@@ -268,9 +272,28 @@ public:
*/
inline int n_rung_generate() const {return _n_rung_generate;}
/**
* @brief Generate a new set of non-parameterized features from a single feature
* @details Take in the feature and perform all valid algebraic operations on it.
*
* @param feat The feature to spawn new features from
* @param feat_set The feature set to pull features from for combinations
* @param feat_ind starting index for the next feature generated
* @param optimizer The object used to optimize the parameterized features
* @param l_bound lower bound for the absolute value of the feature
* @param u_bound upper bound for the abosulte value of the feature
*/
void generate_non_param_feats(
std::vector<node_ptr>::iterator& feat,
std::vector<node_ptr>& feat_set,
unsigned long int& feat_ind,
const double l_bound=1e-50,
const double u_bound=1e50
);
#ifdef PARAMETERIZE
/**
* @brief Generate a new set of features from a single feature
* @brief Generate a new set of parameterized features from a single feature
* @details Take in the feature and perform all valid algebraic operations on it.
*
* @param feat The feature to spawn new features from
......@@ -280,7 +303,7 @@ public:
* @param l_bound lower bound for the absolute value of the feature
* @param u_bound upper bound for the abosulte value of the feature
*/
void generate_new_feats(
void generate_param_feats(
std::vector<node_ptr>::iterator& feat,
std::vector<node_ptr>& feat_set,
unsigned long int& feat_ind,
......@@ -288,24 +311,32 @@ public:
const double l_bound=1e-50,
const double u_bound=1e50
);
#else
/**
* @brief Generate a new set of features from a single feature
* @details Take in the feature and perform all valid algebraic operations on it.
* @brief Generate a new set of parameterized features for the residuals
*
* @param feat The feature to spawn new features from
* @param feat_set The feature set to pull features from for combinations
* @param feat_ind starting index for the next feature generated
* @param optimizer The object used to optimize the parameterized features
* @param l_bound lower bound for the absolute value of the feature
* @param u_bound upper bound for the abosulte value of the feature
*/
void generate_new_feats(
void generate_reparam_feats(
std::vector<node_ptr>::iterator& feat,
std::vector<node_ptr>& feat_set,
unsigned long int& feat_ind,
std::shared_ptr<NLOptimizer> optimizer,
const double l_bound=1e-50,
const double u_bound=1e50
);
/**
* @brief Generate reparameterized feature set
*
* @param prop The property to optimize against
*/
void generate_reparam_feature_set(const std::vector<double>& prop);
#endif
/**
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment