Commit ec0ecfb0 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Bug fixes for node distribution

all lower rungs stored on processes, this was causing issues with set_value if not done this way
parent 073772ca
......@@ -35,7 +35,7 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
void SISSORegressor::set_a(std::vector<int>& inds)
{
for(int ii = 0; ii < inds.size(); ++ii)
std::copy_n(_feat_space->D(inds[ii]), _n_samp, _a.get() + ii * _n_samp);
std::copy_n(node_value_arrs::get_d_matrix_ptr(inds[ii]), _n_samp, _a.get() + ii * _n_samp);
std::copy_n(_ones.get(), _n_samp, _a.get() + inds.size() * _n_samp);
}
......
......@@ -36,7 +36,6 @@ FeatureSpace::FeatureSpace(
_max_abs_feat_val(max_abs_feat_val),
_start_gen(1, 0),
_scores(phi_0.size(), 0.0),
_D(0, 0.0),
_allowed_ops(allowed_ops),
_phi_0(phi_0),
_phi(phi_0)
......@@ -155,10 +154,10 @@ void FeatureSpace::generate_feature_space()
std::vector<size_t> next_phi_sizes;
mpi::all_gather(*_mpi_comm, next_phi.size(), next_phi_sizes);
size_t n_feat = std::accumulate(next_phi_sizes.begin(), next_phi_sizes.end(), _phi.size());
size_t n_feat = std::accumulate(next_phi_sizes.begin(), next_phi_sizes.end(), 0);
size_t n_feat_rank = n_feat / _mpi_comm->size();
size_t n_feat_below_rank = _mpi_comm->rank() * n_feat_rank;
size_t n_feat_added = 0;
if(_mpi_comm->rank() < n_feat % _mpi_comm->size())
{
++n_feat_rank;
......@@ -169,32 +168,18 @@ void FeatureSpace::generate_feature_space()
n_feat_below_rank += n_feat % _mpi_comm->size();
}
if(n_feat_below_rank + n_feat_rank <= _phi.size())
{
_phi.erase(_phi.begin(), _phi.begin() + n_feat_below_rank);
_phi.erase(_phi.begin() + n_feat_rank, _phi.end());
}
else if(n_feat_below_rank <= _phi.size())
{
_phi.erase(_phi.begin(), _phi.begin() + n_feat_below_rank);
}
else
while((n_feat_added < n_feat_rank) && (next_phi.size() > 0))
{
_phi = {};
}
while((_phi.size() < n_feat_rank) && (next_phi.size() > 0))
{
next_phi.back()->reindex(_phi.size() + n_feat_below_rank);
_phi.push_back(next_phi.back());
next_phi.pop_back();
++n_feat_added;
}
// This can be calculated without an all_gather, using it to not introduce too many things at one time
std::vector<size_t> next_phi_needed;
std::vector<size_t> next_phi_excess;
mpi::all_gather(*_mpi_comm, next_phi.size(), next_phi_excess);
mpi::all_gather(*_mpi_comm, n_feat_rank - _phi.size(), next_phi_needed);
mpi::all_gather(*_mpi_comm, n_feat_rank - n_feat_added, next_phi_needed);
std::vector<size_t> send_sizes(next_phi_sizes.size(), 0);
std::vector<size_t> recv_sizes(next_phi_sizes.size(), 0);
......@@ -244,12 +229,12 @@ void FeatureSpace::generate_feature_space()
prev_recv_sent += next_phi_excess[ind];
++ind;
}
recv_size = std::min(next_phi.size(), next_phi_excess[ind]);
recv_size = std::min(n_feat_rank - n_feat_added, next_phi_excess[ind]);
recv_sizes[ind] = recv_size;
total_recv = recv_size;
while((_phi.size() < n_feat_rank) && (ind < _mpi_comm->size()))
while((total_recv < n_feat_rank) && (ind < _mpi_comm->size()))
{
recv_size = std::min(_phi.size() + total_recv, next_phi_excess[ind]);
recv_size = std::min(n_feat_added + total_recv, next_phi_excess[ind]);
recv_sizes[ind] = recv_size;
total_recv += recv_size;
++ind;
......@@ -264,10 +249,18 @@ void FeatureSpace::generate_feature_space()
std::vector<node_ptr> to_recv;
_mpi_comm->recv(pp, _mpi_comm->cantorTagGen(_mpi_comm->rank(), pp, 1, 0), to_recv);
for(auto& feat : to_recv)
{
feat->reindex(_phi.size() + n_feat_below_rank);
_phi.push_back(feat);
}
}
}
if(_max_phi <= _n_rung_store)
{
bool use_temp = (_max_phi > _n_rung_store);
node_value_arrs::resize_values_arr(_n_rung_store, _phi.size(), use_temp);
for(int ff = _start_gen.back(); ff < _phi.size(); ++ff)
{
_phi[ff]->reindex(ff + n_feat_below_rank, ff);
_phi[ff]->set_value();
}
}
}
......@@ -284,70 +277,71 @@ void FeatureSpace::project_r(double* prop)
void FeatureSpace::sis(std::vector<double>& prop)
{
int cur_feat = _D.size() / prop.size();
int previous_size = _D.size();
_D.resize(_D.size() + _n_sis_select * prop.size());
_D.reserve(_D.size());
// while(true)
// {}
int cur_feat = node_value_arrs::N_SELECTED;
node_value_arrs::resize_d_matrix_arr(_n_sis_select);
_phi_selected.reserve(_phi_selected.size() + _n_sis_select);
project_r(prop.data());
std::vector<int> inds = util_funcs::argsort(_scores);
std::vector<double> D_selected(_n_sis_select * prop.size());
std::vector<double> scores_selected(_n_sis_select, 0.0);
std::vector<node_ptr> phi_selected(_n_sis_select, nullptr);
std::vector<node_ptr> phi_selected;
std::string expr;
Unit unit;
std::vector<double> value;
phi_selected.reserve(_n_sis_select);
int ii = 0;
int cur_feat_local = 0;
while((cur_feat_local != D_selected.size() / prop.size()) && (ii < _scores.size()))
while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
{
bool is_valid = true;
for(int dd = 0; dd < cur_feat; ++dd)
{
if(1.0 - std::abs(util_funcs::r(&_D[dd*prop.size()], _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13)
if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(dd), _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13)
{
is_valid = false;
break;
}
}
for(int dd = 0; dd < cur_feat_local; ++dd)
{
if(1.0 - std::abs(util_funcs::r(&D_selected[dd*prop.size()], _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13)
if(1.0 - std::abs(util_funcs::r(node_value_arrs::get_d_matrix_ptr(cur_feat + dd), _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13)
{
is_valid = false;
break;
}
}
if(is_valid)
{
std::copy_n(_phi[inds[ii]]->value_ptr(), prop.size(), &D_selected[cur_feat_local * prop.size()]);
scores_selected[cur_feat_local] = _scores[inds[ii]];
phi_selected[cur_feat_local] = _phi[inds[ii]];
expr = _phi[inds[ii]]->expr();
unit = _phi[inds[ii]]->unit();
phi_selected.push_back(std::make_shared<FeatureNode>(cur_feat + cur_feat_local, _phi[inds[ii]]->expr(), _phi[inds[ii]]->value(), _phi[inds[ii]]->unit(), true));
++cur_feat_local;
}
++ii;
}
phi_selected.resize(cur_feat_local);
scores_selected.resize(cur_feat_local);
D_selected.resize(cur_feat_local * prop.size());
if(_mpi_comm->size() > 1)
{
std::vector<std::vector<double>> all_scores;
std::vector<std::vector<double>> all_D;
std::vector<std::vector<node_ptr>> all_phi;
mpi::all_gather(*_mpi_comm, scores_selected, all_scores);
mpi::all_gather(*_mpi_comm, phi_selected, all_phi);
mpi::all_gather(*_mpi_comm, D_selected, all_D);
int iter_start = 0;
scores_selected = {};
D_selected = {};
phi_selected = {};
for(int sv = 0; sv < _mpi_comm->size(); ++sv)
......@@ -355,40 +349,44 @@ void FeatureSpace::sis(std::vector<double>& prop)
int cur_ind = scores_selected.size();
scores_selected.resize(all_scores[sv].size() + scores_selected.size());
phi_selected.resize(all_phi[sv].size() + phi_selected.size());
D_selected.resize(all_D[sv].size() * prop.size() + D_selected.size());
std::copy_n(all_scores[sv].begin(), all_scores[sv].size(), &scores_selected[cur_ind]);
std::copy_n(all_phi[sv].begin(), all_phi[sv].size(), &phi_selected[cur_ind]);
std::copy_n(all_D[sv].begin(), all_D[sv].size(), &D_selected[cur_ind * prop.size()]);
}
}
inds = util_funcs::argsort(scores_selected);
cur_feat_local = 0;
ii = 0;
while((cur_feat != _D.size() / prop.size()) && (ii < scores_selected.size()))
{
bool is_valid = true;
for(int dd = 0; dd < cur_feat_local; ++dd)
inds = util_funcs::argsort(scores_selected);
cur_feat_local = 0;
ii = 0;
while((cur_feat != node_value_arrs::N_SELECTED) && (ii < scores_selected.size()))
{
if(1.0 - std::abs(util_funcs::r(&_D[previous_size + dd * prop.size()], &D_selected[inds[ii] * prop.size()], prop.size())) < 1e-13)
bool is_valid = true;
for(int dd = 0; dd < cur_feat_local; ++dd)
{
is_valid = false;
break;
if(1.0 - std::abs(util_funcs::r(phi_selected[ii]->value().data(), node_value_arrs::get_d_matrix_ptr(cur_feat - cur_feat_local + dd), prop.size())) < 1e-13)
{
is_valid = false;
break;
}
}
}
if(is_valid)
{
std::copy_n(&D_selected[inds[ii] * prop.size()], prop.size(), &_D[cur_feat * prop.size()]);
_phi_selected.push_back(phi_selected[inds[ii]]);
_phi_selected.back()->set_value();
++cur_feat_local;
++cur_feat;
if(is_valid)
{
_phi_selected.push_back(phi_selected[inds[ii]]);
_phi_selected.back()->reindex(cur_feat);
_phi_selected.back()->set_value();
++cur_feat_local;
++cur_feat;
}
++ii;
}
++ii;
}
if(cur_feat != _D.size() / prop.size())
else
{
cur_feat += cur_feat_local;
for(auto& feat : phi_selected)
_phi_selected.push_back(feat);
}
if(cur_feat != node_value_arrs::N_SELECTED)
throw std::logic_error("SIS went through all features and did not select enough.");
}
......@@ -31,7 +31,6 @@ class FeatureSpace
std::vector<double> _prop; //!< property to learn
std::vector<double> _scores; //!< projection scores for each feature
std::vector<double> _D; //!< matrix of slected features
std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings
std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators
......@@ -93,14 +92,6 @@ public:
*/
inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;}
/**
* @brief Access the value of a selected feature
* @details Given a feature index get the selected values
*
* @param ind index of the selected feature
*/
inline double* D(int ind){return &_D[ind * _n_samp];}
/**
* @brief calculate the projection scores for all features for a given property
* @details Calculate the projection score based on the Pearson correlation
......
......@@ -3,12 +3,14 @@
FeatureNode::FeatureNode()
{}
FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit) :
FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit, bool selected) :
Node(feat_ind, value.size()),
_selected(selected),
_expr(expr),
_unit(unit)
_unit(unit),
_value(value)
{
std::copy_n(value.data(), value.size(), value_ptr());
set_value();
}
FeatureNode::~FeatureNode()
......
......@@ -26,13 +26,17 @@ class FeatureNode: public Node
void serialize(Archive& ar, const unsigned int version)
{
ar & boost::serialization::base_object<Node>(*this);
ar & _selected;
ar & _expr;
ar & _unit;
ar & _value;
}
protected:
bool _selected; //!< True if the features was selected
std::string _expr; //!< Expression of the feature
Unit _unit; //!< Unit for the feature
std::vector<double> _value; //!< values for the feature
public:
/**
* @brief Base Constructor
......@@ -48,7 +52,7 @@ public:
* @param value Value of the feature for each sample
* @param unit Unit of the feature
*/
FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit);
FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit, bool selected=false);
~FeatureNode();
......@@ -62,10 +66,15 @@ public:
*/
inline Unit unit(){return _unit;}
/**
* @brief Get the value of the feature
*/
inline std::vector<double> value(){return _value;}
/**
* @brief Set the value for the feature
*/
inline void set_value(int offset = -1){std::copy_n(node_value_arrs::get_primary_feat_ptr(_feat_ind), _n_samp, value_ptr());}
inline void set_value(int offset = -1){std::copy_n(_value.data(), _n_samp, value_ptr());}
/**
* @brief Check if the feature contains NaN
*/
......@@ -88,13 +97,15 @@ public:
/**
* @brief Accessor function to the value of the feature
*/
inline double* value_ptr(int offset = 0){return node_value_arrs::get_value_ptr(_feat_ind, offset);}
inline double* value_ptr(int offset = 0){return _selected ? node_value_arrs::get_d_matrix_ptr(_arr_ind) : node_value_arrs::get_value_ptr(_arr_ind, offset);}
/**
* @brief return the rung of the feature
*/
inline int rung(int cur_rung = 0){return cur_rung;}
inline std::vector<node_ptr> feats(){return {};}
/**
* @brief update the dictionary used to check if an Add/Sub node is valid
*
......
......@@ -5,7 +5,8 @@ Node::Node()
Node::Node(int feat_ind, int n_samp) :
_n_samp(n_samp),
_feat_ind(feat_ind)
_feat_ind(feat_ind),
_arr_ind(feat_ind)
{}
Node::~Node()
......
......@@ -33,11 +33,13 @@ class Node
{
ar & _n_samp;
ar & _feat_ind;
ar & _arr_ind;
}
protected:
int _n_samp; //!< Number of samples in the feature
int _feat_ind; //!< Index of the feature
int _arr_ind; //!< Index of the feature for the value arrays
public:
/**
......@@ -62,7 +64,16 @@ public:
*
* @param ind the new feature index
*/
inline void reindex(int ind){_feat_ind = ind;}
inline void reindex(int ind){_feat_ind = ind; _arr_ind = ind;}
/**
* @brief Reindex the feature
* @details re-index the feature to be continuous
*
* @param ind the new feature index
* @param arr_ind the new array index
*/
inline void reindex(int ind, int arr_ind){_feat_ind = ind; _arr_ind = arr_ind;}
/**
* @brief Acesssor function to get the number of samples
......@@ -74,6 +85,11 @@ public:
*/
inline int& feat_ind(){return _feat_ind;}
/**
* @brief Accessor function to get the feature array index
*/
inline int& arr_ind(){return _arr_ind;}
/**
* @brief Get the expression for the overall descriptor (From head node down)
*/
......@@ -84,6 +100,13 @@ public:
*/
virtual Unit unit() = 0;
/**
* @brief Get the value of the descriptor
*/
virtual std::vector<double> value() = 0;
virtual std::vector<std::shared_ptr<Node>> feats() = 0;
/**
* @brief Set the value for the feature
*/
......
......@@ -14,13 +14,20 @@ OperatorNode::~OperatorNode()
double* OperatorNode::value_ptr(int offset)
{
offset = (offset == -1) ? rung() : offset;
if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_reg(_feat_ind, offset) != _feat_ind))
if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_reg(_arr_ind, offset) != _arr_ind))
{
set_value(offset);
node_value_arrs::temp_storage_reg(_feat_ind, offset) = _feat_ind;
node_value_arrs::temp_storage_reg(_arr_ind, offset) = _arr_ind;
}
return node_value_arrs::get_value_ptr(_feat_ind, offset);
return node_value_arrs::get_value_ptr(_arr_ind, offset);
}
std::vector<double> OperatorNode::value()
{
std::vector<double> val(_n_samp, 0.0);
std::copy_n(value_ptr(), _n_samp, val.data());
return val;
}
BOOST_SERIALIZATION_ASSUME_ABSTRACT(OperatorNode)
......@@ -53,6 +53,10 @@ public:
virtual Unit unit() = 0;
std::vector<double> value();
inline std::vector<node_ptr> feats(){return _feats;}
virtual void set_value(int offset = -1) = 0;
/**
......
......@@ -27,7 +27,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::abs_diff(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::abs_diff(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -26,7 +26,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::abs(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::abs(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -26,7 +26,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::add(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::add(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -26,7 +26,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::cos(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::cos(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -26,7 +26,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::cb(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::cb(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -26,7 +26,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::cbrt(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::cbrt(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -26,7 +26,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::div(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::div(_n_samp, _feats[0]->value_ptr(offset + 2), _feats[1]->value_ptr(offset + 1), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -26,7 +26,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::exp(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::exp(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -27,7 +27,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::inv(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::inv(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -26,7 +26,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() : offset;
allowed_op_funcs::log(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_feat_ind, offset));
allowed_op_funcs::log(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, offset));
}
/**
......
......@@ -27,7 +27,7 @@ public:
inline void set_value(int offset = -1)
{
offset = (offset == -1) ? rung() :