Commit 1d28781c authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Update tests to increase coverage

Also bug fixes associated with those changes
parent 970ebec8
......@@ -378,7 +378,7 @@ void Model::populate_model(const std::string train_filename, const std::string t
split_line = str_utils::split_string_trim(prop_desc_line);
if(split_line.size() > 2)
{
_prop_label = split_line[1];
_prop_label = split_line[1].substr(1, split_line[1].size() - 2);
_prop_unit = Unit(split_line.back());
}
else
......
......@@ -230,9 +230,8 @@ void FeatureSpace::generate_param_feats(
const double u_bound
)
{
unsigned long int phi_ind = feat - _phi.begin();
unsigned long int phi_ind = feat - start;
feat_set.reserve(feat_set.size() + _un_param_operators.size() + phi_ind * (_com_bin_param_operators.size() + 2 * _bin_param_operators.size()));
for(auto& op : _un_param_operators)
{
op(feat_set, *feat, feat_ind, l_bound, u_bound, optimizer);
......@@ -277,31 +276,37 @@ void FeatureSpace::generate_reparam_feats(
{
for(int rr = 0; rr <= cur_rung; ++rr)
{
for(auto feat_2 = _phi.begin() + _start_rung[rr]; (feat_2 != feat) || (feat_2 != _phi.begin() + _end_no_params[rr]); ++feat_2)
for(auto feat_2 = _phi.begin() + _start_rung[rr]; (feat_2 < feat) || (feat_2 < (_phi.begin() + _end_no_params[rr])); ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
}
}
for(auto feat_2 = _phi_reparam.begin(); (feat_2 != feat) || (feat_2 != _phi_reparam.end()); ++feat_2)
if(_phi_reparam.size() > 0)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
for(auto feat_2 = _phi_reparam.begin(); (feat_2 < _phi_reparam.end()); ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
}
}
}
for(auto& op : _bin_param_operators)
{
for(int rr = 0; rr <= cur_rung; ++rr)
{
for(auto feat_2 = _phi.begin() + _start_rung[rr]; (feat_2 != feat) || (feat_2 != _phi.begin() + _end_no_params[rr]); ++feat_2)
for(auto feat_2 = _phi.begin() + _start_rung[rr]; (feat_2 < feat) || (feat_2 < (_phi.begin() + _end_no_params[rr])); ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
}
}
for(auto feat_2 = _phi_reparam.begin(); (feat_2 != feat) || (feat_2 != _phi_reparam.end()); ++feat_2)
if(_phi_reparam.size() > 0)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
for(auto feat_2 = _phi_reparam.begin(); (feat_2 < _phi_reparam.end()); ++feat_2)
{
op(feat_set, *feat, *feat_2, feat_ind, l_bound, u_bound, optimizer);
op(feat_set, *feat_2, *feat, feat_ind, l_bound, u_bound, optimizer);
}
}
}
}
......@@ -333,23 +338,23 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop)
std::shared_ptr<NLOptimizer> optimizer_param = nlopt_wrapper::get_optimizer(_project_type, _task_sizes_train, _prop_train, _max_rung, _max_param_depth);
std::shared_ptr<NLOptimizer> optimizer_reparam = nlopt_wrapper::get_optimizer(_project_type, _task_sizes_train, prop, _max_rung, _max_param_depth);
#ifdef OMP45
#ifdef OMP45
#pragma omp for schedule(monotonic: dynamic)
#else
#else
#pragma omp for schedule(dynamic)
#endif
#endif
for(auto feat_1 = _phi_reparam.begin() + _start_rung_reparam.back() + _mpi_comm->rank(); feat_1 < _phi_reparam.end(); feat_1 += _mpi_comm->size())
{
generate_non_param_feats(feat_1, next_phi_private, _phi_reparam.begin(), feat_ind, l_bound, u_bound);
generate_param_feats(feat_1, next_phi_private, _phi_reparam.begin(), feat_ind, optimizer_param, l_bound, u_bound);
}
#ifdef OMP45
#ifdef OMP45
#pragma omp for schedule(monotonic: dynamic)
#else
#else
#pragma omp for schedule(dynamic)
#endif
for(auto feat_1 = _phi.begin() + _start_rung[nn-1] + _mpi_comm->rank(); feat_1 < _phi.begin() + _end_no_params[nn-1]; feat_1 += _mpi_comm->size())
#endif
for(auto feat_1 = _phi.begin() + _start_rung[nn-1] + _mpi_comm->rank(); feat_1 < (_phi.begin() + _end_no_params[nn-1]); feat_1 += _mpi_comm->size())
{
generate_reparam_feats(feat_1, next_phi_private, feat_ind, optimizer_reparam, l_bound, u_bound);
}
......@@ -359,7 +364,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop)
}
_start_rung_reparam.push_back(_phi_reparam.size());
node_value_arrs::clear_temp_reg();
if(nn < _max_rung)
if((nn < _max_rung) || (nn <= _n_rung_store) || (_mpi_comm->size() == 1))
{
int new_phi_size;
int phi_size_start = _phi_reparam.size();
......@@ -500,6 +505,150 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop)
}
}
}
else
{
std::vector<size_t> next_phi_sizes(_mpi_comm->size());
if(_mpi_comm->rank() == 0)
{
mpi::gather(*_mpi_comm, next_phi.size(), next_phi_sizes.data(), 0);
mpi::broadcast(*_mpi_comm, next_phi_sizes.data(), next_phi_sizes.size(), 0);
}
else
{
mpi::gather(*_mpi_comm, next_phi.size(), 0);
mpi::broadcast(*_mpi_comm, next_phi_sizes.data(), next_phi_sizes.size(), 0);
}
size_t n_feat = std::accumulate(next_phi_sizes.begin(), next_phi_sizes.end(), 0);
if(n_feat == 0)
{
throw std::logic_error("No features created during this rung (" + std::to_string(nn) + ")");
}
size_t n_feat_rank = n_feat / _mpi_comm->size();
size_t n_feat_below_rank = _mpi_comm->rank() * n_feat_rank;
size_t n_feat_added = 0;
if(_mpi_comm->rank() < n_feat % _mpi_comm->size())
{
++n_feat_rank;
n_feat_below_rank += _mpi_comm->rank();
}
else
{
n_feat_below_rank += n_feat % _mpi_comm->size();
}
while((n_feat_added < n_feat_rank) && (next_phi.size() > 0))
{
_phi_reparam.push_back(next_phi.back());
next_phi.pop_back();
++n_feat_added;
}
// This can be calculated without an all_gather, using it to not introduce too many things at one time
std::vector<size_t> next_phi_needed(_mpi_comm->size());
std::vector<size_t> next_phi_excess(_mpi_comm->size());
if(_mpi_comm->rank() == 0)
{
mpi::gather(*_mpi_comm, next_phi.size(), next_phi_excess.data(), 0);
mpi::gather(*_mpi_comm, n_feat_rank - n_feat_added, next_phi_needed.data(), 0);
mpi::broadcast(*_mpi_comm, next_phi_excess.data(), next_phi_excess.size(), 0);
mpi::broadcast(*_mpi_comm, next_phi_needed.data(), next_phi_needed.size(), 0);
}
else
{
mpi::gather(*_mpi_comm, next_phi.size(), 0);
mpi::gather(*_mpi_comm, n_feat_rank - n_feat_added, 0);
mpi::broadcast(*_mpi_comm, next_phi_excess.data(), next_phi_excess.size(), 0);
mpi::broadcast(*_mpi_comm, next_phi_needed.data(), next_phi_needed.size(), 0);
}
std::vector<size_t> send_sizes(next_phi_sizes.size(), 0);
std::vector<size_t> recv_sizes(next_phi_sizes.size(), 0);
// Is this rank sending or receiving?
if(next_phi_excess[_mpi_comm->rank()] > 0)
{
size_t total_sent = std::accumulate(next_phi_excess.begin(), next_phi_excess.begin() + _mpi_comm->rank(), 0);
size_t prev_sent_recv = 0;
size_t send_size = 0;
int ind = 0;
while((prev_sent_recv <= total_sent) && (ind < _mpi_comm->size()))
{
prev_sent_recv += next_phi_needed[ind];
++ind;
}
send_size = std::min(next_phi.size(), prev_sent_recv - total_sent);
send_sizes[ind-1] = send_size;
total_sent = send_size;
while((total_sent < next_phi.size()) && (ind < _mpi_comm->size()))
{
send_size = std::min(next_phi.size() - total_sent, next_phi_needed[ind]);
send_sizes[ind] = send_size;
total_sent += send_size;
++ind;
}
total_sent = 0;
for(int pp = 0; pp < send_sizes.size(); ++pp)
{
if(send_sizes[pp] == 0)
continue;
std::vector<node_ptr> to_send(send_sizes[pp]);
std::copy_n(next_phi.begin() + total_sent, send_sizes[pp], to_send.begin());
_mpi_comm->send(pp, _mpi_comm->cantor_tag_gen(_mpi_comm->rank(), pp, 1, 0), to_send);
total_sent += send_sizes[pp];
}
}
else
{
size_t total_recv = std::accumulate(next_phi_needed.begin(), next_phi_needed.begin() + _mpi_comm->rank(), 0);
size_t prev_recv_sent = 0;
size_t recv_size = 0;
int ind = 0;
while((prev_recv_sent <= total_recv) && (ind < _mpi_comm->size()))
{
prev_recv_sent += next_phi_excess[ind];
++ind;
}
recv_size = std::min(n_feat_rank - n_feat_added, prev_recv_sent - total_recv);
recv_sizes[ind-1] = recv_size;
total_recv = recv_size;
while((total_recv < n_feat_rank - n_feat_added) && (ind < _mpi_comm->size()))
{
recv_size = std::min(n_feat_rank - n_feat_added - total_recv, next_phi_excess[ind]);
recv_sizes[ind] = recv_size;
total_recv += recv_size;
++ind;
}
total_recv = 0;
for(int pp = 0; pp < recv_sizes.size(); ++pp)
{
if(recv_sizes[pp] == 0)
{
continue;
}
std::vector<node_ptr> to_recv;
_mpi_comm->recv(pp, _mpi_comm->cantor_tag_gen(pp, _mpi_comm->rank(), 1, 0), to_recv);
for(auto& feat : to_recv)
{
_phi_reparam.push_back(feat);
}
}
}
#pragma omp parallel for
for(int ff = _start_rung.back(); ff < _phi_reparam.size(); ++ff)
{
_phi_reparam[ff]->reindex(ff + n_feat_below_rank, ff);
_phi_reparam[ff]->set_value();
_phi_reparam[ff]->set_test_value();
}
}
}
}
#endif
......@@ -566,11 +715,11 @@ void FeatureSpace::generate_feature_space()
std::vector<node_ptr> next_phi_private;
std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes_train, _prop_train, _max_rung, _max_param_depth);
#ifdef OMP45
#ifdef OMP45
#pragma omp for schedule(monotonic: dynamic)
#else
#else
#pragma omp for schedule(dynamic)
#endif
#endif
for(auto feat_1 = _phi.begin() + _start_rung.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
{
generate_non_param_feats(feat_1, next_phi_private, _phi.begin(), feat_ind, l_bound, u_bound);
......@@ -584,11 +733,11 @@ void FeatureSpace::generate_feature_space()
#pragma omp parallel firstprivate(feat_ind, l_bound, u_bound)
{
std::vector<node_ptr> next_phi_private;
#ifdef OMP45
#ifdef OMP45
#pragma omp for schedule(monotonic: dynamic)
#else
#else
#pragma omp for schedule(dynamic)
#endif
#endif
for(auto feat_1 = _phi.begin() + _start_rung.back() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
{
generate_non_param_feats(feat_1, next_phi_private, _phi.begin(), feat_ind, l_bound, u_bound);
......@@ -902,7 +1051,7 @@ void FeatureSpace::generate_feature_space()
}
}
#ifdef PARAMETERIZE
#ifdef PARAMETERIZE
// Reorder features based on the number of parameters they have (none goes first)
std::vector<int> feat_n_params(_phi.size() - _start_rung.back());
std::transform(
......@@ -928,9 +1077,11 @@ void FeatureSpace::generate_feature_space()
// Set how many features have no parameters
_end_no_params.push_back(
_start_rung.back() +
std::count_if(feat_n_params.begin(), feat_n_params.end(), [](int n_param){return n_param == 0;})
);
#endif
#endif
}
_n_feat = _phi.size();
}
......@@ -970,11 +1121,11 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std:
}
#endif
#ifdef OMP45
#ifdef OMP45
#pragma omp for schedule(monotonic: dynamic)
#else
#else
#pragma omp for schedule(dynamic)
#endif
#endif
for(auto feat = _phi.begin() + _start_rung.back() + _mpi_comm->rank(); feat < _phi.end(); feat += _mpi_comm->size())
{
unsigned long int feat_ind = _phi.size() + _n_sis_select * (omp_get_num_threads() + _mpi_comm->size());
......@@ -984,16 +1135,16 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std:
bool is_sel = (*feat)->selected();
(*feat)->set_selected(false);
#ifdef PARAMETERIZE
#ifdef PARAMETERIZE
generate_non_param_feats(feat, generated_phi, _phi.begin(), feat_ind, _l_bound, _u_bound);
generate_param_feats(feat, generated_phi, _phi.begin(), feat_ind, optimizer, _l_bound, _u_bound);
if(reparam_optimizer && (feat < _phi.begin() + _end_no_params.back()))
{
generate_reparam_feats(feat, generated_phi, feat_ind, reparam_optimizer, _l_bound, _u_bound);
}
#else
#else
generate_non_param_feats(feat, generated_phi, _phi.begin(), feat_ind, _l_bound, _u_bound);
#endif
#endif
(*feat)->set_selected(is_sel);
if(generated_phi.size() == 0)
......@@ -1008,7 +1159,7 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std:
std::vector<int> inds = util_funcs::argsort<double>(scores);
int ii = 0;
while((ii < inds.size()) && (scores[inds[ii]] < -1.0))
while((ii < inds.size()) && (scores[inds[ii]] < (-1.0 - 1e-10)))
{
++ii;
}
......@@ -1094,6 +1245,7 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std:
void FeatureSpace::sis(const std::vector<double>& prop)
{
std::cout << _project_type << std::endl;
sis(
loss_function_util::get_loss_function(
_project_type,
......@@ -1108,7 +1260,7 @@ void FeatureSpace::sis(const std::vector<double>& prop)
void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
{
#ifdef PARAMETERIZE
#ifdef PARAMETERIZE
// Reparameterize for the residuals
if(_reparam_residual && (_phi_selected.size() > 0))
{
......@@ -1120,7 +1272,7 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
_phi.insert(_phi.end(), _phi_reparam.begin(), _phi_reparam.end());
_scores.resize(_phi.size());
}
#endif
#endif
// Create output directories if needed
boost::filesystem::path p(_feature_space_file.c_str());
boost::filesystem::create_directories(p.remove_filename());
......@@ -1144,7 +1296,6 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
// Get projection scores
double start_time = omp_get_wtime();
project_funcs::project_loss(loss, _phi, _scores.data());
_mpi_comm->barrier();
if(_mpi_comm->rank() == 0)
{
......
......@@ -326,142 +326,6 @@ public:
// Python Interface Functions
#ifdef PY_BINDINGS
#ifdef PARAMETERIZE
// DocString: feat_space_init_py_list
/**
* @brief FeatureSpace constructor given a set of primary features and operators
*
* @param phi_0 (list) The set of primary features
* @param allowed_ops (list) The list of allowed operators
* @param allowed_param_ops (list) The list of allowed operators to be used with non-linear optimization
* @param prop (list) List containing the property vector (training data only)
* @param project_type (str) The type of loss function/projection operator to use
* @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
* @param n_sis_select (int) The number of features to select during each SIS step
* @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
* @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
* @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
* @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
* @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
* @param max_param_depth (int) The maximum depth in the binary expression tree to set non-linear optimization
* @param reparam_residual (bool) If True then reparameterize features using the residuals of each model
*/
FeatureSpace(
py::list phi_0,
py::list allowed_ops,
py::list allowed_param_ops,
py::list prop,
std::string project_type="regression",
int max_rung=1,
int n_sis_select=1,
int n_rung_store=-1,
int n_rung_generate=0,
double cross_corr_max=1.0,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50,
int max_param_depth = -1,
bool reparam_residual=false
);
// DocString: feat_space_init_np_array
/**
* @brief FeatureSpace constructor given a set of primary features and operators
*
* @param phi_0 (list) The set of primary features
* @param allowed_ops (list) The list of allowed operators
* @param allowed_param_ops (list) The list of allowed operators to be used with non-linear optimization
* @param prop (np.ndarray) List containing the property vector (training data only)
* @param project_type (str) The type of loss function/projection operator to use
* @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
* @param n_sis_select (int) The number of features to select during each SIS step
* @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
* @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
* @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
* @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
* @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
* @param max_param_depth (int) The maximum depth in the binary expression tree to set non-linear optimization
* @param reparam_residual (bool) If True then reparameterize features using the residuals of each model
*/
FeatureSpace(
py::list phi_0,
py::list allowed_ops,
py::list allowed_param_ops,
np::ndarray prop,
std::string project_type="regression",
int max_rung=1,
int n_sis_select=1,
int n_rung_store=-1,
int n_rung_generate=0,
double cross_corr_max=1.0,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50,
int max_param_depth = -1,
bool reparam_residual=false
);
#else
// DocString: feat_space_ini_no_param_py_list
/**
* @brief FeatureSpace constructor given a set of primary features and operators
*
* @param phi_0 (list) The set of primary features
* @param allowed_ops (list) The list of allowed operators
* @param prop (list) List containing the property vector (training data only)
* @param project_type (str) The type of loss function/projection operator to use
* @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
* @param n_sis_select (int) The number of features to select during each SIS step
* @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
* @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
* @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
* @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
* @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
*/
FeatureSpace(
py::list phi_0,
py::list allowed_ops,
py::list prop,
std::string project_type="regression",
int max_rung=1,
int n_sis_select=1,
int n_rung_store=-1,
int n_rung_generate=0,
double cross_corr_max=1.0,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50
);
// DocString: feat_space_init_no_param_np_array
/**
* @brief FeatureSpace constructor given a set of primary features and operators
*
* @param phi_0 (list) The set of primary features
* @param allowed_ops (list) The list of allowed operators
* @param prop (np.ndarray) List containing the property vector (training data only)
* @param project_type (str) The type of loss function/projection operator to use
* @param max_rung (int) The maximum rung of the feature (Height of the binary expression tree -1)
* @param n_sis_select (int) The number of features to select during each SIS step
* @param n_rung_store (int) The number of rungs whose feature's data is always stored in memory
* @param n_rung_generate (int) Either 0 or 1, and is the number of rungs to generate on the fly during SIS
* @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
* @param min_abs_feat_val (double) The minimum allowed absolute feature value for a feature
* @param max_abs_feat_val (double) The maximum allowed absolute feature value for a feature
*/
FeatureSpace(
py::list phi_0,
py::list allowed_ops,
np::ndarray prop,
std::string project_type="regression",
int max_rung=1,
int n_sis_select=1,
int n_rung_store=-1,
int n_rung_generate=0,
double cross_corr_max=1.0,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50
);
#endif
// DocString: feat_space_init_file_np_array
/**
......@@ -567,6 +431,19 @@ public:
*/
inline py::list allowed_ops_py(){return python_conv_utils::to_list<std::string>(_allowed_ops);}
#ifdef PARAMETERIZE
// DocString: feat_space_allowed_ops_py
/**
* @brief The list of allowed operators
*/
inline py::list allowed_param_ops_py(){return python_conv_utils::to_list<std::string>(_allowed_param_ops);}
#else
// DocString: feat_space_allowed_ops_py
/**
* @brief The list of allowed operators
*/
inline py::list allowed_param_ops_py(){return python_conv_utils::to_list<std::string>({});}
#endif
// DocString: feat_space_start_rung_py
/**
* @brief A list containing the index of the first feature of each rung in the feature space.
......
......@@ -407,7 +407,7 @@ public:
* @param depth How far down a given Node is from the root OperatorNode
* @return the number of parameters (_params.size())
*/
inline int n_params(int n_cur=0, int depth = 1) const {return n_cur;};
inline int n_params_possible(int n_cur=0, int depth = 1) const {return n_cur;};
/**
* @brief Set the value of all test samples for the feature inside the central data storage array
......
......@@ -84,6 +84,9 @@ ModelNode::ModelNode(node_ptr in_node) :
_rung(in_node->rung()),
_n_leaves(0)
{
_d_mat_ind = in_node->d_mat_ind();
_selected = in_node->selected();
double w_remap_svm_temp = 1.0 / (*std::max_element(_value.begin(), _value.end()) - *std::min_element(_value.begin(), _value.end()));
double b_remap_svm_temp = *std::min_element(_value.begin(), _value.end());
......
......@@ -425,7 +425,16 @@ public:
* @param depth How far down a given Node is from the root OperatorNode
* @return the number of parameters (_params.size())
*/
virtual int n_params(const int n_cur = 0, const int depth = 1) const = 0;
virtual int n_params_possible(const int n_cur = 0, const int depth = 1) const = 0;
// DocString: node_n_params
/**
* @brief returns the number of actual parameters for this feature
*/
virtual inline int n_params() const
{
return 0;
}