Commit d8e40283 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Parallelized Code

Feature Creation, projection scores, and l0 normalization all parallelized
parent fb42bef3
...@@ -4,7 +4,8 @@ AM_CXXFLAGS=-I$(top_srcdir) ...@@ -4,7 +4,8 @@ AM_CXXFLAGS=-I$(top_srcdir)
bin_PROGRAMS = $(top_builddir)/sisso_cpp bin_PROGRAMS = $(top_builddir)/sisso_cpp
__top_builddir__sisso_cpp_SOURCES = \ __top_builddir__sisso_cpp_SOURCES = \
feature_creation/node/value_storage/nodes_value_containers.cpp \ mpi_interface/MPI_Interface.cpp \
feature_creation/node/value_storage/nodes_value_containers.cpp \
feature_creation/units/Unit.cpp \ feature_creation/units/Unit.cpp \
feature_creation/node/Node.cpp \ feature_creation/node/Node.cpp \
feature_creation/node/FeatureNode.cpp \ feature_creation/node/FeatureNode.cpp \
......
#include <descriptor_identifier/SISSORegressor.hpp> #include <descriptor_identifier/SISSORegressor.hpp>
SISSORegressor::SISSORegressor(FeatureSpace feat_space, std::vector<double> prop, int n_dim): SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim):
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_samp(prop.size()), _n_samp(prop.size()),
_n_dim(n_dim), _n_dim(n_dim),
_lwork(-1), _lwork(-1),
...@@ -12,8 +14,7 @@ SISSORegressor::SISSORegressor(FeatureSpace feat_space, std::vector<double> prop ...@@ -12,8 +14,7 @@ SISSORegressor::SISSORegressor(FeatureSpace feat_space, std::vector<double> prop
_work(nullptr), _work(nullptr),
_s(new double[n_dim + 1]), _s(new double[n_dim + 1]),
_models(), _models(),
_prop(prop), _prop(prop)
_feat_space(feat_space)
{ {
// Initialize a, b, ones, s, and _error arrays // Initialize a, b, ones, s, and _error arrays
...@@ -35,7 +36,7 @@ SISSORegressor::SISSORegressor(FeatureSpace feat_space, std::vector<double> prop ...@@ -35,7 +36,7 @@ SISSORegressor::SISSORegressor(FeatureSpace feat_space, std::vector<double> prop
void SISSORegressor::set_a(std::vector<int>& inds) void SISSORegressor::set_a(std::vector<int>& inds)
{ {
for(int ii = 0; ii < inds.size(); ++ii) for(int ii = 0; ii < inds.size(); ++ii)
std::copy_n(_feat_space.D(inds[ii]), _n_samp, _a.get() + ii * _n_samp); std::copy_n(_feat_space->D(inds[ii]), _n_samp, _a.get() + ii * _n_samp);
std::copy_n(_ones.get(), _n_samp, _a.get() + inds.size() * _n_samp); std::copy_n(_ones.get(), _n_samp, _a.get() + inds.size() * _n_samp);
} }
...@@ -79,14 +80,14 @@ void SISSORegressor::set_error(std::vector<int>& inds, double* coeffs) ...@@ -79,14 +80,14 @@ void SISSORegressor::set_error(std::vector<int>& inds, double* coeffs)
void SISSORegressor::fit() void SISSORegressor::fit()
{ {
std::vector<double> residual(_n_samp); std::vector<double> residual(_n_samp);
_feat_space.sis(_prop); _feat_space->sis(_prop);
std::vector<node_ptr> min_node(1, _feat_space.phi_selected()[0]); std::vector<node_ptr> min_node(1, _feat_space->phi_selected()[0]);
_models.push_back(Model(_prop, min_node)); _models.push_back(Model(_prop, min_node));
_models[_models.size() - 1].copy_error(residual.data()); _models[_models.size() - 1].copy_error(residual.data());
for(int dd = 2; dd <= _n_dim; ++dd) for(int dd = 2; dd <= _n_dim; ++dd)
{ {
_feat_space.sis(residual); _feat_space->sis(residual);
l0_norm(residual, dd); l0_norm(residual, dd);
_models[_models.size() - 1].copy_error(residual.data()); _models[_models.size() - 1].copy_error(residual.data());
} }
...@@ -101,10 +102,21 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) ...@@ -101,10 +102,21 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
double min_error = util_funcs::norm(prop.data(), _n_samp); double min_error = util_funcs::norm(prop.data(), _n_samp);
int n = _feat_space.phi_selected().size(); int n = _feat_space->phi_selected().size();
int length = n;
for(int nn = n - 1; nn > n - n_dim; --nn)
length *= nn;
length /= n_dim;
std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(length, 0);
std::vector<bool> v(n); std::vector<bool> v(n);
std::fill(v.end() - n_dim, v.end(), true); std::fill(v.end() - n_dim, v.end(), true);
int rr = 0;
do{
++rr;
}while(std::next_permutation(v.begin(), v.end()) && (rr < start_end[0]));
do { do {
int nn = 0; int nn = 0;
int ii = 0; int ii = 0;
...@@ -124,11 +136,22 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) ...@@ -124,11 +136,22 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
std::copy_n(inds.begin(), inds.size(), inds_min.begin()); std::copy_n(inds.begin(), inds.size(), inds_min.begin());
min_error = util_funcs::norm(_error.get(), _n_samp); min_error = util_funcs::norm(_error.get(), _n_samp);
} }
} while (std::next_permutation(v.begin(), v.end()));
++rr;
} while(std::next_permutation(v.begin(), v.end()) && (rr < start_end[1]));
std::vector<double> all_min_error;
std::vector<std::vector<int>> all_inds_min;
mpi::all_gather(*_mpi_comm, min_error, all_min_error);
mpi::all_gather(*_mpi_comm, inds_min, all_inds_min);
int argmin = std::min_element(all_min_error.begin(), all_min_error.end()) - all_min_error.begin();
std::vector<node_ptr> min_nodes(n_dim); std::vector<node_ptr> min_nodes(n_dim);
for(int ii = 0; ii < n_dim; ++ii) for(int ii = 0; ii < n_dim; ++ii)
min_nodes[ii] = _feat_space.phi_selected()[inds_min[ii]]; min_nodes[ii] = _feat_space->phi_selected()[all_inds_min[argmin][ii]];
_models.push_back(Model(_prop, min_nodes)); _models.push_back(Model(_prop, min_nodes));
} }
...@@ -7,6 +7,9 @@ ...@@ -7,6 +7,9 @@
class SISSORegressor class SISSORegressor
{ {
protected: protected:
std::shared_ptr<FeatureSpace> _feat_space;
std::shared_ptr<MPI_Interface> _mpi_comm;
int _n_samp; int _n_samp;
int _n_dim; int _n_dim;
int _lwork; int _lwork;
...@@ -21,10 +24,9 @@ protected: ...@@ -21,10 +24,9 @@ protected:
std::vector<Model> _models; std::vector<Model> _models;
std::vector<double> _prop; std::vector<double> _prop;
FeatureSpace _feat_space;
public: public:
SISSORegressor(FeatureSpace feat_space, std::vector<double> prop, int n_dim); SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim);
int get_opt_lwork(int n_dim); int get_opt_lwork(int n_dim);
void least_squares(std::vector<int>& inds, double* coeffs); void least_squares(std::vector<int>& inds, double* coeffs);
...@@ -35,7 +37,7 @@ public: ...@@ -35,7 +37,7 @@ public:
void fit(); void fit();
void l0_norm(std::vector<double>& prop, int n_dim); void l0_norm(std::vector<double>& prop, int n_dim);
inline FeatureSpace& feat_space(){return _feat_space;} inline std::shared_ptr<FeatureSpace> feat_space(){return _feat_space;}
inline std::vector<double>& prop(){return _prop;} inline std::vector<double>& prop(){return _prop;}
inline std::vector<Model>& models(){return _models;} inline std::vector<Model>& models(){return _models;}
inline int n_samp(){return _n_samp;} inline int n_samp(){return _n_samp;}
......
#include <feature_creation/feature_space/FeatureSpace.hpp> #include <feature_creation/feature_space/FeatureSpace.hpp>
BOOST_CLASS_EXPORT(FeatureNode)
BOOST_CLASS_EXPORT(AddNode)
BOOST_CLASS_EXPORT(SubNode)
BOOST_CLASS_EXPORT(AbsDiffNode)
BOOST_CLASS_EXPORT(MultNode)
BOOST_CLASS_EXPORT(DivNode)
BOOST_CLASS_EXPORT(SqNode)
BOOST_CLASS_EXPORT(SqrtNode)
BOOST_CLASS_EXPORT(CbNode)
BOOST_CLASS_EXPORT(CbrtNode)
BOOST_CLASS_EXPORT(SixPowNode)
BOOST_CLASS_EXPORT(ExpNode)
BOOST_CLASS_EXPORT(NegExpNode)
BOOST_CLASS_EXPORT(LogNode)
BOOST_CLASS_EXPORT(AbsNode)
BOOST_CLASS_EXPORT(InvNode)
BOOST_CLASS_EXPORT(SinNode)
BOOST_CLASS_EXPORT(CosNode)
FeatureSpace::FeatureSpace( FeatureSpace::FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0, std::vector<node_ptr> phi_0,
std::vector<std::string> allowed_ops, std::vector<std::string> allowed_ops,
int max_phi, int max_phi,
int n_sis_select, int n_sis_select,
double max_abs_feat_val double max_abs_feat_val
): ):
max_phi_(max_phi), _mpi_comm(mpi_comm),
n_sis_select_(n_sis_select), _max_phi(max_phi),
n_samp_(phi_0[0]->n_samp()), _n_sis_select(n_sis_select),
n_feat_(phi_0.size()), _n_samp(phi_0[0]->n_samp()),
max_abs_feat_val_(max_abs_feat_val), _n_feat(phi_0.size()),
start_gen_(1, 0), _max_abs_feat_val(max_abs_feat_val),
scores_(phi_0.size(), 0.0), _start_gen(1, 0),
D_(0, 0.0), _scores(phi_0.size(), 0.0),
phi_0_(phi_0), _D(0, 0.0),
phi_(phi_0) _phi_0(phi_0),
_phi(phi_0)
{ {
for(auto & op : allowed_ops) for(auto & op : allowed_ops)
{ {
if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0)) if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0))
com_bin_operators_.push_back(allowed_op_maps::binary_operator_map[op]); _com_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
else if((op.compare("div") == 0)) else if((op.compare("div") == 0))
bin_operators_.push_back(allowed_op_maps::binary_operator_map[op]); _bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
else else
un_operators_.push_back(allowed_op_maps::unary_operator_map[op]); _un_operators.push_back(allowed_op_maps::unary_operator_map[op]);
} }
generate_feature_space(); generate_feature_space();
scores_.resize(phi_.size()); _scores.resize(_phi.size());
scores_.reserve(phi_.size()); _scores.reserve(_phi.size());
} }
FeatureSpace::FeatureSpace( FeatureSpace::FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0, std::vector<node_ptr> phi_0,
int max_phi, int max_phi,
int n_sis_select, int n_sis_select,
double max_abs_feat_val double max_abs_feat_val
): ):
max_phi_(max_phi), _mpi_comm(mpi_comm),
n_sis_select_(n_sis_select), _max_phi(max_phi),
n_samp_(phi_0[0]->n_samp()), _n_sis_select(n_sis_select),
n_feat_(phi_0.size()), _n_samp(phi_0[0]->n_samp()),
max_abs_feat_val_(max_abs_feat_val), _n_feat(phi_0.size()),
start_gen_(1, 0), _max_abs_feat_val(max_abs_feat_val),
scores_(phi_0.size(), 0.0), _start_gen(1, 0),
D_(0, 0.0), _scores(phi_0.size(), 0.0),
phi_0_(phi_0), _D(0, 0.0),
phi_(phi_0) _phi_0(phi_0),
_phi(phi_0)
{ {
for(auto& op : allowed_op_maps::unary_operator_map) for(auto& op : allowed_op_maps::unary_operator_map)
{ {
un_operators_.push_back(op.second); _un_operators.push_back(op.second);
} }
for(auto& op : allowed_op_maps::binary_operator_map) for(auto& op : allowed_op_maps::binary_operator_map)
{ {
if((op.first.compare("div") == 0)) if((op.first.compare("div") == 0))
bin_operators_.push_back(op.second); _bin_operators.push_back(op.second);
else else
com_bin_operators_.push_back(op.second); _com_bin_operators.push_back(op.second);
} }
generate_feature_space(); generate_feature_space();
scores_.resize(phi_.size()); _scores.resize(_phi.size());
scores_.reserve(phi_.size()); _scores.reserve(_phi.size());
} }
FeatureSpace::FeatureSpace(FeatureSpace &o) : FeatureSpace::FeatureSpace(FeatureSpace &o) :
max_phi_(o.max_phi_), _max_phi(o._max_phi),
n_sis_select_(o.n_sis_select_), _n_sis_select(o._n_sis_select),
n_samp_(o.n_samp_), _n_samp(o._n_samp),
n_feat_(o.n_feat_), _n_feat(o._n_feat),
max_abs_feat_val_(o.max_abs_feat_val_), _max_abs_feat_val(o._max_abs_feat_val),
start_gen_(o.start_gen_), _start_gen(o._start_gen),
start_D_ind_(o.start_D_ind_), _start_ind(o._start_ind),
prop_(o.prop_), _prop(o._prop),
scores_(o.scores_), _scores(o._scores),
D_(o.D_), _D(o._D),
un_operators_(o.un_operators_), _un_operators(o._un_operators),
bin_operators_(o.bin_operators_), _bin_operators(o._bin_operators),
com_bin_operators_(o.com_bin_operators_), _com_bin_operators(o._com_bin_operators),
phi_selected_(o.phi_selected_), _phi_selected(o._phi_selected),
phi_(o.phi_), _phi(o._phi),
phi_0_(o.phi_0_) _phi_0(o._phi_0)
{}
FeatureSpace::FeatureSpace(FeatureSpace &&o) :
max_phi_(o.max_phi_),
n_sis_select_(o.n_sis_select_),
n_samp_(o.n_samp_),
n_feat_(o.n_feat_),
max_abs_feat_val_(o.max_abs_feat_val_),
start_gen_(o.start_gen_),
start_D_ind_(o.start_D_ind_),
prop_(o.prop_),
scores_(o.scores_),
D_(o.D_),
un_operators_(o.un_operators_),
bin_operators_(o.bin_operators_),
com_bin_operators_(o.com_bin_operators_),
phi_selected_(o.phi_selected_),
phi_(o.phi_),
phi_0_(o.phi_0_)
{} {}
void FeatureSpace::generate_feature_space() void FeatureSpace::generate_feature_space()
{ {
for(int nn = 1; nn <= max_phi_; ++nn) for(int nn = 1; nn <= _max_phi; ++nn)
{ {
std::vector<node_ptr> next_phi; std::vector<node_ptr> next_phi;
n_feat_ = phi_.size();
int feat_ind = n_feat_; _n_feat = _phi.size();
for(int f1 = start_gen_[start_gen_.size()-1]; f1 < phi_.size(); ++f1) int feat_ind = _n_feat;
std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size() - _start_gen[_start_gen.size()-1], _start_gen[_start_gen.size()-1]);
for(int f1 = start_end[0]; f1 < start_end[1]; ++f1)
{ {
for(auto& op : un_operators_) for(auto& op : _un_operators)
{ {
try try
{ {
next_phi.push_back(op(phi_[f1], feat_ind)); next_phi.push_back(op(_phi[f1], feat_ind));
++feat_ind; ++feat_ind;
} }
catch(const InvalidFeatureException& e) catch(const InvalidFeatureException& e)
...@@ -125,13 +131,13 @@ void FeatureSpace::generate_feature_space() ...@@ -125,13 +131,13 @@ void FeatureSpace::generate_feature_space()
} }
} }
for(auto& op : com_bin_operators_) for(auto& op : _com_bin_operators)
{ {
for(int f2 = 0; f2 < f1; ++f2) for(int f2 = 0; f2 < f1; ++f2)
{ {
try try
{ {
next_phi.push_back(op(phi_[f1], phi_[f2], feat_ind)); next_phi.push_back(op(_phi[f1], _phi[f2], feat_ind));
++feat_ind; ++feat_ind;
} }
catch(const InvalidFeatureException& e) catch(const InvalidFeatureException& e)
...@@ -141,13 +147,13 @@ void FeatureSpace::generate_feature_space() ...@@ -141,13 +147,13 @@ void FeatureSpace::generate_feature_space()
} }
} }
for(auto& op : bin_operators_) for(auto& op : _bin_operators)
{ {
for(int f2 = 0; f2 < f1; ++f2) for(int f2 = 0; f2 < f1; ++f2)
{ {
try try
{ {
next_phi.push_back(op(phi_[f1], phi_[f2], feat_ind)); next_phi.push_back(op(_phi[f1], _phi[f2], feat_ind));
++feat_ind; ++feat_ind;
} }
catch(const InvalidFeatureException& e) catch(const InvalidFeatureException& e)
...@@ -156,7 +162,7 @@ void FeatureSpace::generate_feature_space() ...@@ -156,7 +162,7 @@ void FeatureSpace::generate_feature_space()
} }
try try
{ {
next_phi.push_back(op(phi_[f2], phi_[f1], feat_ind)); next_phi.push_back(op(_phi[f2], _phi[f1], feat_ind));
++feat_ind; ++feat_ind;
} }
catch(const InvalidFeatureException& e) catch(const InvalidFeatureException& e)
...@@ -166,40 +172,56 @@ void FeatureSpace::generate_feature_space() ...@@ -166,40 +172,56 @@ void FeatureSpace::generate_feature_space()
} }
} }
} }
_mpi_comm->barrier();
_start_gen.push_back(_phi.size());
start_gen_.push_back(phi_.size()); std::vector<std::vector<node_ptr>> next_phi_gathered;
mpi::all_gather(*_mpi_comm, next_phi, next_phi_gathered);
for(auto& feat : next_phi)
if(!feat->is_nan() && !feat->is_const())
phi_.push_back(feat);
for(auto& next_phi_vec : next_phi_gathered)
for(auto& feat : next_phi_vec)
_phi.push_back(feat);
} }
} }
void FeatureSpace::project_r(std::vector<double>& prop) void FeatureSpace::project_r(std::vector<double>& prop)
{ {
for(int ff = 0; ff < phi_.size(); ++ff) std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size(), 0);
scores_[ff] = std::abs(util_funcs::r(prop.data(), phi_[ff]->value_ptr(), prop.size()));
std::vector<double> scores(start_end[1] - start_end[0], 0.0);
for(int ff = start_end[0]; ff < start_end[1]; ++ff)
scores[ff - start_end[0]] = std::abs(util_funcs::r(prop.data(), _phi[ff]->value_ptr(), prop.size()));
std::vector<std::vector<double>> all_scores;
mpi::all_gather(*_mpi_comm, scores, all_scores);
int iter_start = 0;
for(auto& score_vec : all_scores)
{
for(int ii = 0; ii < score_vec.size(); ++ii)
_scores[ii + iter_start] = score_vec[ii];
iter_start += score_vec.size();
}
} }
void FeatureSpace::sis(std::vector<double>& prop) void FeatureSpace::sis(std::vector<double>& prop)
{ {
int cur_feat = D_.size() / prop.size(); int cur_feat = _D.size() / prop.size();
D_.resize(D_.size() + n_sis_select_ * prop.size()); _D.resize(_D.size() + _n_sis_select * prop.size());
D_.reserve(D_.size()); _D.reserve(_D.size());
phi_selected_.reserve(phi_selected_.size()); _phi_selected.reserve(_phi_selected.size());
project_r(prop); project_r(prop);
std::vector<int> inds = util_funcs::argsort(scores_); std::vector<int> inds = util_funcs::argsort(_scores);
int ii = inds.size() - 1; int ii = inds.size() - 1;
while((cur_feat != D_.size() / prop.size()) && (ii >= 0)) while((cur_feat != _D.size() / prop.size()) && (ii >= 0))
{ {
bool is_valid = true; bool is_valid = true;
for(int dd = 0; dd < cur_feat; ++dd) for(int dd = 0; dd < cur_feat; ++dd)
{ {
if(1.0 - std::abs(util_funcs::r(&D_[dd*prop.size()], phi_[inds[ii]]->value_ptr(), prop.size())) < 1e-12) if(1.0 - std::abs(util_funcs::r(&_D[dd*prop.size()], _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-12)
{ {
is_valid = false; is_valid = false;
break; break;
...@@ -207,10 +229,10 @@ void FeatureSpace::sis(std::vector<double>& prop) ...@@ -207,10 +229,10 @@ void FeatureSpace::sis(std::vector<double>& prop)
} }
if(is_valid) if(is_valid)
{ {
std::copy_n(phi_[inds[ii]]->value_ptr(), prop.size(), &D_[cur_feat * prop.size()]); std::copy_n(_phi[inds[ii]]->value_ptr(), prop.size(), &_D[cur_feat * prop.size()]);