Commit d8e40283 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Parallelized Code

Feature Creation, projection scores, and l0 normalization all parallelized
parent fb42bef3
......@@ -4,7 +4,8 @@ AM_CXXFLAGS=-I$(top_srcdir)
bin_PROGRAMS = $(top_builddir)/sisso_cpp
__top_builddir__sisso_cpp_SOURCES = \
feature_creation/node/value_storage/nodes_value_containers.cpp \
mpi_interface/MPI_Interface.cpp \
feature_creation/node/value_storage/nodes_value_containers.cpp \
feature_creation/units/Unit.cpp \
feature_creation/node/Node.cpp \
feature_creation/node/FeatureNode.cpp \
......
#include <descriptor_identifier/SISSORegressor.hpp>
SISSORegressor::SISSORegressor(FeatureSpace feat_space, std::vector<double> prop, int n_dim):
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim):
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_samp(prop.size()),
_n_dim(n_dim),
_lwork(-1),
......@@ -12,8 +14,7 @@ SISSORegressor::SISSORegressor(FeatureSpace feat_space, std::vector<double> prop
_work(nullptr),
_s(new double[n_dim + 1]),
_models(),
_prop(prop),
_feat_space(feat_space)
_prop(prop)
{
// Initialize a, b, ones, s, and _error arrays
......@@ -35,7 +36,7 @@ SISSORegressor::SISSORegressor(FeatureSpace feat_space, std::vector<double> prop
void SISSORegressor::set_a(std::vector<int>& inds)
{
for(int ii = 0; ii < inds.size(); ++ii)
std::copy_n(_feat_space.D(inds[ii]), _n_samp, _a.get() + ii * _n_samp);
std::copy_n(_feat_space->D(inds[ii]), _n_samp, _a.get() + ii * _n_samp);
std::copy_n(_ones.get(), _n_samp, _a.get() + inds.size() * _n_samp);
}
......@@ -79,14 +80,14 @@ void SISSORegressor::set_error(std::vector<int>& inds, double* coeffs)
void SISSORegressor::fit()
{
std::vector<double> residual(_n_samp);
_feat_space.sis(_prop);
std::vector<node_ptr> min_node(1, _feat_space.phi_selected()[0]);
_feat_space->sis(_prop);
std::vector<node_ptr> min_node(1, _feat_space->phi_selected()[0]);
_models.push_back(Model(_prop, min_node));
_models[_models.size() - 1].copy_error(residual.data());
for(int dd = 2; dd <= _n_dim; ++dd)
{
_feat_space.sis(residual);
_feat_space->sis(residual);
l0_norm(residual, dd);
_models[_models.size() - 1].copy_error(residual.data());
}
......@@ -101,10 +102,21 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
double min_error = util_funcs::norm(prop.data(), _n_samp);
int n = _feat_space.phi_selected().size();
int n = _feat_space->phi_selected().size();
int length = n;
for(int nn = n - 1; nn > n - n_dim; --nn)
length *= nn;
length /= n_dim;
std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(length, 0);
std::vector<bool> v(n);
std::fill(v.end() - n_dim, v.end(), true);
int rr = 0;
do{
++rr;
}while(std::next_permutation(v.begin(), v.end()) && (rr < start_end[0]));
do {
int nn = 0;
int ii = 0;
......@@ -124,11 +136,22 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
std::copy_n(inds.begin(), inds.size(), inds_min.begin());
min_error = util_funcs::norm(_error.get(), _n_samp);
}
} while (std::next_permutation(v.begin(), v.end()));
++rr;
} while(std::next_permutation(v.begin(), v.end()) && (rr < start_end[1]));
std::vector<double> all_min_error;
std::vector<std::vector<int>> all_inds_min;
mpi::all_gather(*_mpi_comm, min_error, all_min_error);
mpi::all_gather(*_mpi_comm, inds_min, all_inds_min);
int argmin = std::min_element(all_min_error.begin(), all_min_error.end()) - all_min_error.begin();
std::vector<node_ptr> min_nodes(n_dim);
for(int ii = 0; ii < n_dim; ++ii)
min_nodes[ii] = _feat_space.phi_selected()[inds_min[ii]];
min_nodes[ii] = _feat_space->phi_selected()[all_inds_min[argmin][ii]];
_models.push_back(Model(_prop, min_nodes));
}
......@@ -7,6 +7,9 @@
class SISSORegressor
{
protected:
std::shared_ptr<FeatureSpace> _feat_space;
std::shared_ptr<MPI_Interface> _mpi_comm;
int _n_samp;
int _n_dim;
int _lwork;
......@@ -21,10 +24,9 @@ protected:
std::vector<Model> _models;
std::vector<double> _prop;
FeatureSpace _feat_space;
public:
SISSORegressor(FeatureSpace feat_space, std::vector<double> prop, int n_dim);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, int n_dim);
int get_opt_lwork(int n_dim);
void least_squares(std::vector<int>& inds, double* coeffs);
......@@ -35,7 +37,7 @@ public:
void fit();
void l0_norm(std::vector<double>& prop, int n_dim);
inline FeatureSpace& feat_space(){return _feat_space;}
inline std::shared_ptr<FeatureSpace> feat_space(){return _feat_space;}
inline std::vector<double>& prop(){return _prop;}
inline std::vector<Model>& models(){return _models;}
inline int n_samp(){return _n_samp;}
......
#include <feature_creation/feature_space/FeatureSpace.hpp>
BOOST_CLASS_EXPORT(FeatureNode)
BOOST_CLASS_EXPORT(AddNode)
BOOST_CLASS_EXPORT(SubNode)
BOOST_CLASS_EXPORT(AbsDiffNode)
BOOST_CLASS_EXPORT(MultNode)
BOOST_CLASS_EXPORT(DivNode)
BOOST_CLASS_EXPORT(SqNode)
BOOST_CLASS_EXPORT(SqrtNode)
BOOST_CLASS_EXPORT(CbNode)
BOOST_CLASS_EXPORT(CbrtNode)
BOOST_CLASS_EXPORT(SixPowNode)
BOOST_CLASS_EXPORT(ExpNode)
BOOST_CLASS_EXPORT(NegExpNode)
BOOST_CLASS_EXPORT(LogNode)
BOOST_CLASS_EXPORT(AbsNode)
BOOST_CLASS_EXPORT(InvNode)
BOOST_CLASS_EXPORT(SinNode)
BOOST_CLASS_EXPORT(CosNode)
FeatureSpace::FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
std::vector<std::string> allowed_ops,
int max_phi,
int n_sis_select,
double max_abs_feat_val
):
max_phi_(max_phi),
n_sis_select_(n_sis_select),
n_samp_(phi_0[0]->n_samp()),
n_feat_(phi_0.size()),
max_abs_feat_val_(max_abs_feat_val),
start_gen_(1, 0),
scores_(phi_0.size(), 0.0),
D_(0, 0.0),
phi_0_(phi_0),
phi_(phi_0)
_mpi_comm(mpi_comm),
_max_phi(max_phi),
_n_sis_select(n_sis_select),
_n_samp(phi_0[0]->n_samp()),
_n_feat(phi_0.size()),
_max_abs_feat_val(max_abs_feat_val),
_start_gen(1, 0),
_scores(phi_0.size(), 0.0),
_D(0, 0.0),
_phi_0(phi_0),
_phi(phi_0)
{
for(auto & op : allowed_ops)
{
if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0))
com_bin_operators_.push_back(allowed_op_maps::binary_operator_map[op]);
_com_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
else if((op.compare("div") == 0))
bin_operators_.push_back(allowed_op_maps::binary_operator_map[op]);
_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
else
un_operators_.push_back(allowed_op_maps::unary_operator_map[op]);
_un_operators.push_back(allowed_op_maps::unary_operator_map[op]);
}
generate_feature_space();
scores_.resize(phi_.size());
scores_.reserve(phi_.size());
_scores.resize(_phi.size());
_scores.reserve(_phi.size());
}
FeatureSpace::FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
int max_phi,
int n_sis_select,
double max_abs_feat_val
):
max_phi_(max_phi),
n_sis_select_(n_sis_select),
n_samp_(phi_0[0]->n_samp()),
n_feat_(phi_0.size()),
max_abs_feat_val_(max_abs_feat_val),
start_gen_(1, 0),
scores_(phi_0.size(), 0.0),
D_(0, 0.0),
phi_0_(phi_0),
phi_(phi_0)
_mpi_comm(mpi_comm),
_max_phi(max_phi),
_n_sis_select(n_sis_select),
_n_samp(phi_0[0]->n_samp()),
_n_feat(phi_0.size()),
_max_abs_feat_val(max_abs_feat_val),
_start_gen(1, 0),
_scores(phi_0.size(), 0.0),
_D(0, 0.0),
_phi_0(phi_0),
_phi(phi_0)
{
for(auto& op : allowed_op_maps::unary_operator_map)
{
un_operators_.push_back(op.second);
_un_operators.push_back(op.second);
}
for(auto& op : allowed_op_maps::binary_operator_map)
{
if((op.first.compare("div") == 0))
bin_operators_.push_back(op.second);
_bin_operators.push_back(op.second);
else
com_bin_operators_.push_back(op.second);
_com_bin_operators.push_back(op.second);
}
generate_feature_space();
scores_.resize(phi_.size());
scores_.reserve(phi_.size());
_scores.resize(_phi.size());
_scores.reserve(_phi.size());
}
FeatureSpace::FeatureSpace(FeatureSpace &o) :
max_phi_(o.max_phi_),
n_sis_select_(o.n_sis_select_),
n_samp_(o.n_samp_),
n_feat_(o.n_feat_),
max_abs_feat_val_(o.max_abs_feat_val_),
start_gen_(o.start_gen_),
start_D_ind_(o.start_D_ind_),
prop_(o.prop_),
scores_(o.scores_),
D_(o.D_),
un_operators_(o.un_operators_),
bin_operators_(o.bin_operators_),
com_bin_operators_(o.com_bin_operators_),
phi_selected_(o.phi_selected_),
phi_(o.phi_),
phi_0_(o.phi_0_)
{}
FeatureSpace::FeatureSpace(FeatureSpace &&o) :
max_phi_(o.max_phi_),
n_sis_select_(o.n_sis_select_),
n_samp_(o.n_samp_),
n_feat_(o.n_feat_),
max_abs_feat_val_(o.max_abs_feat_val_),
start_gen_(o.start_gen_),
start_D_ind_(o.start_D_ind_),
prop_(o.prop_),
scores_(o.scores_),
D_(o.D_),
un_operators_(o.un_operators_),
bin_operators_(o.bin_operators_),
com_bin_operators_(o.com_bin_operators_),
phi_selected_(o.phi_selected_),
phi_(o.phi_),
phi_0_(o.phi_0_)
_max_phi(o._max_phi),
_n_sis_select(o._n_sis_select),
_n_samp(o._n_samp),
_n_feat(o._n_feat),
_max_abs_feat_val(o._max_abs_feat_val),
_start_gen(o._start_gen),
_start_ind(o._start_ind),
_prop(o._prop),
_scores(o._scores),
_D(o._D),
_un_operators(o._un_operators),
_bin_operators(o._bin_operators),
_com_bin_operators(o._com_bin_operators),
_phi_selected(o._phi_selected),
_phi(o._phi),
_phi_0(o._phi_0)
{}
void FeatureSpace::generate_feature_space()
{
for(int nn = 1; nn <= max_phi_; ++nn)
for(int nn = 1; nn <= _max_phi; ++nn)
{
std::vector<node_ptr> next_phi;
n_feat_ = phi_.size();
int feat_ind = n_feat_;
for(int f1 = start_gen_[start_gen_.size()-1]; f1 < phi_.size(); ++f1)
_n_feat = _phi.size();
int feat_ind = _n_feat;
std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size() - _start_gen[_start_gen.size()-1], _start_gen[_start_gen.size()-1]);
for(int f1 = start_end[0]; f1 < start_end[1]; ++f1)
{
for(auto& op : un_operators_)
for(auto& op : _un_operators)
{
try
{
next_phi.push_back(op(phi_[f1], feat_ind));
next_phi.push_back(op(_phi[f1], feat_ind));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -125,13 +131,13 @@ void FeatureSpace::generate_feature_space()
}
}
for(auto& op : com_bin_operators_)
for(auto& op : _com_bin_operators)
{
for(int f2 = 0; f2 < f1; ++f2)
{
try
{
next_phi.push_back(op(phi_[f1], phi_[f2], feat_ind));
next_phi.push_back(op(_phi[f1], _phi[f2], feat_ind));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -141,13 +147,13 @@ void FeatureSpace::generate_feature_space()
}
}
for(auto& op : bin_operators_)
for(auto& op : _bin_operators)
{
for(int f2 = 0; f2 < f1; ++f2)
{
try
{
next_phi.push_back(op(phi_[f1], phi_[f2], feat_ind));
next_phi.push_back(op(_phi[f1], _phi[f2], feat_ind));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -156,7 +162,7 @@ void FeatureSpace::generate_feature_space()
}
try
{
next_phi.push_back(op(phi_[f2], phi_[f1], feat_ind));
next_phi.push_back(op(_phi[f2], _phi[f1], feat_ind));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -166,40 +172,56 @@ void FeatureSpace::generate_feature_space()
}
}
}
_mpi_comm->barrier();
_start_gen.push_back(_phi.size());
start_gen_.push_back(phi_.size());
for(auto& feat : next_phi)
if(!feat->is_nan() && !feat->is_const())
phi_.push_back(feat);
std::vector<std::vector<node_ptr>> next_phi_gathered;
mpi::all_gather(*_mpi_comm, next_phi, next_phi_gathered);
for(auto& next_phi_vec : next_phi_gathered)
for(auto& feat : next_phi_vec)
_phi.push_back(feat);
}
}
void FeatureSpace::project_r(std::vector<double>& prop)
{
for(int ff = 0; ff < phi_.size(); ++ff)
scores_[ff] = std::abs(util_funcs::r(prop.data(), phi_[ff]->value_ptr(), prop.size()));
std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size(), 0);
std::vector<double> scores(start_end[1] - start_end[0], 0.0);
for(int ff = start_end[0]; ff < start_end[1]; ++ff)
scores[ff - start_end[0]] = std::abs(util_funcs::r(prop.data(), _phi[ff]->value_ptr(), prop.size()));
std::vector<std::vector<double>> all_scores;
mpi::all_gather(*_mpi_comm, scores, all_scores);
int iter_start = 0;
for(auto& score_vec : all_scores)
{
for(int ii = 0; ii < score_vec.size(); ++ii)
_scores[ii + iter_start] = score_vec[ii];
iter_start += score_vec.size();
}
}
void FeatureSpace::sis(std::vector<double>& prop)
{
int cur_feat = D_.size() / prop.size();
D_.resize(D_.size() + n_sis_select_ * prop.size());
D_.reserve(D_.size());
int cur_feat = _D.size() / prop.size();
_D.resize(_D.size() + _n_sis_select * prop.size());
_D.reserve(_D.size());
phi_selected_.reserve(phi_selected_.size());
_phi_selected.reserve(_phi_selected.size());
project_r(prop);
std::vector<int> inds = util_funcs::argsort(scores_);
std::vector<int> inds = util_funcs::argsort(_scores);
int ii = inds.size() - 1;
while((cur_feat != D_.size() / prop.size()) && (ii >= 0))
while((cur_feat != _D.size() / prop.size()) && (ii >= 0))
{
bool is_valid = true;
for(int dd = 0; dd < cur_feat; ++dd)
{
if(1.0 - std::abs(util_funcs::r(&D_[dd*prop.size()], phi_[inds[ii]]->value_ptr(), prop.size())) < 1e-12)
if(1.0 - std::abs(util_funcs::r(&_D[dd*prop.size()], _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-12)
{
is_valid = false;
break;
......@@ -207,10 +229,10 @@ void FeatureSpace::sis(std::vector<double>& prop)
}
if(is_valid)
{
std::copy_n(phi_[inds[ii]]->value_ptr(), prop.size(), &D_[cur_feat * prop.size()]);
std::copy_n(_phi[inds[ii]]->value_ptr(), prop.size(), &_D[cur_feat * prop.size()]);
++cur_feat;
phi_selected_.push_back(phi_[inds[ii]]);
_phi_selected.push_back(_phi[inds[ii]]);
}
--ii;
}
......
#ifndef FEATURE_SPACE
#define FEATURE_SPACE
#include <mpi_interface/MPI_Interface.hpp>
#include <feature_creation/node/FeatureNode.hpp>
#include <feature_creation/node/operator_nodes/allowed_ops.hpp>
#include <boost/serialization/shared_ptr.hpp>
#include <iostream>
// namespace mpi = boost::mpi;
class FeatureSpace
{
int max_phi_;
int n_sis_select_;
int n_samp_;
int n_feat_;
std::shared_ptr<MPI_Interface> _mpi_comm;
int _max_phi;
int _n_sis_select;
int _n_samp;
int _n_feat;
double max_abs_feat_val_;
double _max_abs_feat_val;
std::vector<int> start_gen_;
std::vector<int> start_D_ind_;
std::vector<int> _start_gen;
std::vector<int> _start_ind;
std::vector<double> prop_;
std::vector<double> scores_;
std::vector<double> D_;
std::vector<double> _prop;
std::vector<double> _scores;
std::vector<double> _D;
std::vector<un_op_node_gen> un_operators_;
std::vector<bin_op_node_gen> bin_operators_;
std::vector<bin_op_node_gen> com_bin_operators_;
std::vector<un_op_node_gen> _un_operators;
std::vector<bin_op_node_gen> _bin_operators;
std::vector<bin_op_node_gen> _com_bin_operators;
std::vector<node_ptr> phi_selected_;
std::vector<node_ptr> phi_;
std::vector<node_ptr> phi_0_;
std::vector<node_ptr> _phi_selected;
std::vector<node_ptr> _phi;
std::vector<node_ptr> _phi_0;
public:
FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
std::vector<std::string> allowed_ops,
int max_phi=1,
......@@ -40,6 +47,7 @@ public:
);
FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
int max_phi=1,
int n_sis_select=1,
......@@ -47,15 +55,15 @@ public:
);
FeatureSpace(FeatureSpace &o);
FeatureSpace(FeatureSpace &&o);
void generate_feature_space();
inline std::vector<node_ptr> phi_selected(){return phi_selected_;};
inline std::vector<node_ptr> phi(){return phi_;};
inline std::vector<node_ptr> phi0(){return phi_0_;};
inline std::vector<double>& scores(){return scores_;};
inline double* D(int ind){return &D_[ind * n_samp_];}
inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
inline std::vector<node_ptr> phi(){return _phi;};
inline std::vector<node_ptr> phi0(){return _phi_0;};
inline std::vector<double>& scores(){return _scores;};
inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;}
inline double* D(int ind){return &_D[ind * _n_samp];}
void project_r(std::vector<double>& prop);
......
#include <feature_creation/node/FeatureNode.hpp>
FeatureNode::FeatureNode()
{}
FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit) :
Node(feat_ind, expr, value, unit)
{}
......@@ -7,5 +10,7 @@ FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> val
FeatureNode::FeatureNode(const FeatureNode &o) :
Node(o)
{
std::copy_n(o._value_ptr.get(), o._n_samp, _value_ptr.get());
std::copy_n(o._value_ptr.data(), o._n_samp, _value_ptr.data());
}
// BOOST_CLASS_EXPORT(FeatureNode)
......@@ -5,6 +5,7 @@
#include <utils/enum.hpp>
#include <memory>
#include <boost/serialization/export.hpp>
#include <boost/serialization/base_object.hpp>
#include <feature_creation/node/Node.hpp>
......@@ -14,7 +15,9 @@ typedef std::function<double(double, double)> binary_op_func;
class FeatureNode: public Node
{
friend class boost::serialization::access;
public:
FeatureNode();
FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit);
FeatureNode(const FeatureNode &o);