Commit fe2eac21 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Centeralized data into 1D arrays

Features now point to a position in centeral arrays.

Recurisive solutions for all rungs above 2
parent d8e40283
......@@ -21,14 +21,13 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
std::vector<double> zeros((_n_dim + 1) * _n_samp, 0.0);
std::copy_n(zeros.begin(), (_n_dim + 1) * _n_samp, _a.get());
std::copy_n(zeros.begin(), _n_samp, _b.get());
std::copy_n(zeros.begin(), _n_dim + 1, _s.get());
std::copy_n(zeros.begin(), _n_samp, _error.get());
std::copy_n(zeros.begin(), _n_dim + 1, _s.get());
std::vector<double> ones(_n_samp, 1.0);
std::copy_n(ones.begin(), ones.size(), _ones.get());
// Get optimal work size
std::vector<double> work(1, 0.0);
// // Get optimal work size
_lwork = get_opt_lwork(_n_dim + 1);
_work = std::unique_ptr<double[]>(new double[_lwork]);
}
......@@ -53,17 +52,16 @@ void SISSORegressor::least_squares(std::vector<int>& inds, double* coeffs)
if(info == 0)
std::copy_n(_b.get(), n_dim, coeffs);
else
{
std::cout << inds[0] << '\t' << inds[1] << std::endl;
throw std::logic_error("least_squares failed.");
}
}
int SISSORegressor::get_opt_lwork(int n_dim)
{
std::vector<double> work(1, 0.0);
int info = 0;
dgelss_(_n_samp, n_dim, 1, _a.get(), _n_samp, _b.get(), _n_samp, _s.get(), 1e-13, &_rank, work.data(), -1, &info);
int info = 0, rank = 0;
dgelss_(_n_samp, n_dim, 1, _a.get(), _n_samp, _b.get(), _n_samp, _s.get(), 0.0, &rank, work.data(), -1, &info);
if(info == 0)
return static_cast<int>(std::round(work[0]));
else
......
......@@ -35,6 +35,7 @@ FeatureSpace::FeatureSpace(
_start_gen(1, 0),
_scores(phi_0.size(), 0.0),
_D(0, 0.0),
_allowed_ops(allowed_ops),
_phi_0(phi_0),
_phi(phi_0)
{
......@@ -52,41 +53,6 @@ FeatureSpace::FeatureSpace(
_scores.reserve(_phi.size());
}
FeatureSpace::FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
int max_phi,
int n_sis_select,
double max_abs_feat_val
):
_mpi_comm(mpi_comm),
_max_phi(max_phi),
_n_sis_select(n_sis_select),
_n_samp(phi_0[0]->n_samp()),
_n_feat(phi_0.size()),
_max_abs_feat_val(max_abs_feat_val),
_start_gen(1, 0),
_scores(phi_0.size(), 0.0),
_D(0, 0.0),
_phi_0(phi_0),
_phi(phi_0)
{
for(auto& op : allowed_op_maps::unary_operator_map)
{
_un_operators.push_back(op.second);
}
for(auto& op : allowed_op_maps::binary_operator_map)
{
if((op.first.compare("div") == 0))
_bin_operators.push_back(op.second);
else
_com_bin_operators.push_back(op.second);
}
generate_feature_space();
_scores.resize(_phi.size());
_scores.reserve(_phi.size());
}
FeatureSpace::FeatureSpace(FeatureSpace &o) :
_max_phi(o._max_phi),
_n_sis_select(o._n_sis_select),
......@@ -98,6 +64,7 @@ FeatureSpace::FeatureSpace(FeatureSpace &o) :
_prop(o._prop),
_scores(o._scores),
_D(o._D),
_allowed_ops(o._allowed_ops),
_un_operators(o._un_operators),
_bin_operators(o._bin_operators),
_com_bin_operators(o._com_bin_operators),
......@@ -113,16 +80,19 @@ void FeatureSpace::generate_feature_space()
std::vector<node_ptr> next_phi;
_n_feat = _phi.size();
int feat_ind = _n_feat;
std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size() - _start_gen[_start_gen.size()-1], _start_gen[_start_gen.size()-1]);
for(int f1 = start_end[0]; f1 < start_end[1]; ++f1)
int feat_ind = _n_feat + node_value_arrs::get_max_number_features(_allowed_ops, 1, start_end[0]);
next_phi.reserve(node_value_arrs::get_max_number_features(_allowed_ops, 1, start_end[1] - start_end[0]));
for(auto feat_1 = _phi.begin() + start_end[0]; feat_1 != _phi.begin() + start_end[1]; ++feat_1)
{
for(auto& op : _un_operators)
{
try
{
next_phi.push_back(op(_phi[f1], feat_ind));
next_phi.push_back(op(*feat_1, nn, feat_ind));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -133,11 +103,11 @@ void FeatureSpace::generate_feature_space()
for(auto& op : _com_bin_operators)
{
for(int f2 = 0; f2 < f1; ++f2)
for(auto feat_2 = _phi.begin(); feat_2 != feat_1; ++feat_2)
{
try
{
next_phi.push_back(op(_phi[f1], _phi[f2], feat_ind));
next_phi.push_back(op(*feat_1, *feat_2, nn, feat_ind));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -149,11 +119,11 @@ void FeatureSpace::generate_feature_space()
for(auto& op : _bin_operators)
{
for(int f2 = 0; f2 < f1; ++f2)
for(auto feat_2 = _phi.begin(); feat_2 != feat_1; ++feat_2)
{
try
{
next_phi.push_back(op(_phi[f1], _phi[f2], feat_ind));
next_phi.push_back(op(*feat_1, *feat_2, nn, feat_ind));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -162,7 +132,7 @@ void FeatureSpace::generate_feature_space()
}
try
{
next_phi.push_back(op(_phi[f2], _phi[f1], feat_ind));
next_phi.push_back(op(*feat_2, *feat_1, nn, feat_ind));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -172,15 +142,36 @@ void FeatureSpace::generate_feature_space()
}
}
}
// for(auto& feat : next_phi)
// {
// feat->set_feat_val_ptrs();
// feat->set_value();
// }
_mpi_comm->barrier();
if(_mpi_comm->rank() == 0)
std::cout << "NEXT_PHI MADE" << std::endl;
else
std::cout << "NNNN NEXT_PHI MADE" << std::endl;
_start_gen.push_back(_phi.size());
std::vector<std::vector<node_ptr>> next_phi_gathered;
mpi::all_gather(*_mpi_comm, next_phi, next_phi_gathered);
if(_mpi_comm->rank() == 0)
std::cout << "all gather" << std::endl;
else
std::cout << "aaaa all_gather" << std::endl;
for(auto& next_phi_vec : next_phi_gathered)
{
for(auto& feat : next_phi_vec)
{
if(nn <= node_value_arrs::N_RUNGS_STORED)
feat->set_value();
_phi.push_back(feat);
}
}
std::cout << "DONE"<< std::endl;
}
}
......@@ -190,7 +181,7 @@ void FeatureSpace::project_r(std::vector<double>& prop)
std::vector<double> scores(start_end[1] - start_end[0], 0.0);
for(int ff = start_end[0]; ff < start_end[1]; ++ff)
scores[ff - start_end[0]] = std::abs(util_funcs::r(prop.data(), _phi[ff]->value_ptr(), prop.size()));
scores[ff - start_end[0]] = -1.0 * std::abs(util_funcs::r(prop.data(), _phi[ff]->value_ptr(), prop.size()));
std::vector<std::vector<double>> all_scores;
mpi::all_gather(*_mpi_comm, scores, all_scores);
......@@ -199,7 +190,9 @@ void FeatureSpace::project_r(std::vector<double>& prop)
for(auto& score_vec : all_scores)
{
for(int ii = 0; ii < score_vec.size(); ++ii)
{
_scores[ii + iter_start] = score_vec[ii];
}
iter_start += score_vec.size();
}
}
......@@ -215,18 +208,19 @@ void FeatureSpace::sis(std::vector<double>& prop)
project_r(prop);
std::vector<int> inds = util_funcs::argsort(_scores);
int ii = inds.size() - 1;
while((cur_feat != _D.size() / prop.size()) && (ii >= 0))
int ii = 0;
while((cur_feat != _D.size() / prop.size()) && (ii < _scores.size()))
{
bool is_valid = true;
for(int dd = 0; dd < cur_feat; ++dd)
{
if(1.0 - std::abs(util_funcs::r(&_D[dd*prop.size()], _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-12)
if(1.0 - std::abs(util_funcs::r(&_D[dd*prop.size()], _phi[inds[ii]]->value_ptr(), prop.size())) < 1e-13)
{
is_valid = false;
break;
}
}
if(is_valid)
{
std::copy_n(_phi[inds[ii]]->value_ptr(), prop.size(), &_D[cur_feat * prop.size()]);
......@@ -234,8 +228,8 @@ void FeatureSpace::sis(std::vector<double>& prop)
_phi_selected.push_back(_phi[inds[ii]]);
}
--ii;
++ii;
}
if(ii < 0)
if(ii >= _scores.size())
throw std::logic_error("SIS went through all features and did not select enough.");
}
......@@ -28,6 +28,7 @@ class FeatureSpace
std::vector<double> _scores;
std::vector<double> _D;
std::vector<std::string> _allowed_ops;
std::vector<un_op_node_gen> _un_operators;
std::vector<bin_op_node_gen> _bin_operators;
std::vector<bin_op_node_gen> _com_bin_operators;
......@@ -46,14 +47,6 @@ public:
double max_abs_feat_val=1e27
);
FeatureSpace(
std::shared_ptr<MPI_Interface> mpi_comm,
std::vector<node_ptr> phi_0,
int max_phi=1,
int n_sis_select=1,
double max_abs_feat_val=1e27
);
FeatureSpace(FeatureSpace &o);
void generate_feature_space();
......
......@@ -4,13 +4,15 @@ FeatureNode::FeatureNode()
{}
FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit) :
Node(feat_ind, expr, value, unit)
{}
Node(feat_ind, value.size()),
_expr(expr),
_unit(unit)
{
std::copy_n(value.data(), value.size(), value_ptr());
}
FeatureNode::FeatureNode(const FeatureNode &o) :
Node(o)
{
std::copy_n(o._value_ptr.data(), o._n_samp, _value_ptr.data());
}
{}
// BOOST_CLASS_EXPORT(FeatureNode)
......@@ -16,20 +16,36 @@ typedef std::function<double(double, double)> binary_op_func;
class FeatureNode: public Node
{
friend class boost::serialization::access;
protected:
std::string _expr;
Unit _unit;
public:
FeatureNode();
FeatureNode(int feat_ind, std::string expr, std::vector<double> value, Unit unit);
FeatureNode(const FeatureNode &o);
inline std::string expr(){return _expr;}
inline Unit unit(){return _unit;}
inline void set_value(){return;}
inline int rung(){return 0;}
inline void set_feat_val_ptrs(){return;}
inline std::vector<double*> feat_value_ptrs(){return std::vector<double*>(0);}
inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});}
inline bool is_const()
{
double mean = util_funcs::mean(value_ptr(), _n_samp);
return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;});
}
inline double* value_ptr(){return node_value_arrs::get_value_ptr(_feat_ind);}
template <typename Archive>
void serialize(Archive& ar, const unsigned int version)
{
ar & boost::serialization::base_object<Node>(*this);
ar & _expr;
ar & _unit;
}
};
......
......@@ -3,38 +3,15 @@
Node::Node()
{}
Node::Node(int feat_ind, std::string expr, std::vector<double> value, Unit unit, NODE_TYPE tag) :
_n_samp(value.size()),
_tag(tag),
_feat_ind(feat_ind),
_expr(expr),
_unit(unit),
// _value_ptr(nullptr),
_value_ptr(_n_samp)
{
// _value_ptr = node_value_arrs::access_value_arr(feat_ind);
// _value_ptr = _value;
std::copy_n(value.data(), value.size(), value_ptr());
}
Node::Node(int feat_ind, std::string expr, int n_samp, NODE_TYPE tag) :
Node::Node(int feat_ind, int n_samp) :
_n_samp(n_samp),
_tag(tag),
_feat_ind(feat_ind),
_expr(expr),
_unit(),
_value_ptr(_n_samp)
_feat_ind(feat_ind)
{}
Node::Node(const Node &o) :
_n_samp(o._n_samp),
_tag(o._tag),
_feat_ind(o._feat_ind),
_expr(o._expr),
_unit(o._unit),
_value_ptr(o._n_samp)
{
std::copy_n(o._value_ptr.data(), o._n_samp, value_ptr());
}
_feat_ind(o._feat_ind)
{}
BOOST_SERIALIZATION_ASSUME_ABSTRACT(Node)
......@@ -17,60 +17,43 @@
typedef std::function<double(double)> unary_op_func;
typedef std::function<double(double, double)> binary_op_func;
class Node
{
protected:
int _n_samp;
NODE_TYPE _tag;
int _feat_ind;
std::string _expr;
Unit _unit;
// double* _value_ptr;
std::vector<double> _value_ptr;
// std::array<double, _n_samp> _value;
public:
Node();
Node(int feat_ind, std::string expr, std::vector<double> value, Unit unit, NODE_TYPE tag = NODE_TYPE::FEAT);
Node(int feat_ind, std::string expr, int n_samp, NODE_TYPE tag = NODE_TYPE::FEAT);
Node(int feat_ind, int n_samp);
Node(const Node &o);
// bool equal(Node node_2);
inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});}
inline bool is_const()
{
double mean = util_funcs::mean(value_ptr(), _n_samp);
return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;});
}
// inline bool operator== (Node node_2){return equal(node_2);}
// inline bool operator!= (Node node_2){return !equal(node_2);}
inline int n_samp(){return _n_samp;}
inline NODE_TYPE tag(){return _tag;}
inline int& feat_ind(){return _feat_ind;}
virtual std::string expr() = 0;
virtual Unit unit() = 0;
inline double* value_ptr(){return _value_ptr.data();}
virtual void set_value() = 0;
virtual int rung() = 0;
virtual void set_feat_val_ptrs() = 0;
virtual std::vector<double*> feat_value_ptrs() = 0;
virtual double* value_ptr() = 0;
virtual bool is_nan() = 0;
virtual bool is_const() = 0;
template <typename Archive>
void serialize(Archive& ar, const unsigned int version)
{
ar & _n_samp;
ar & _tag;
ar & _feat_ind;
ar & _expr;
ar & _unit;
ar & _value_ptr;
}
// inline void set_value_ptr(double* value_ptr){_value_ptr = value_ptr;}
};
typedef std::shared_ptr<Node> node_ptr;
......
......@@ -3,25 +3,19 @@
OperatorNode::OperatorNode()
{}
OperatorNode::OperatorNode(std::vector<node_ptr> feats, int feat_ind, std::string expr, NODE_TYPE tag) :
Node(feat_ind, expr, feats[0]->n_samp(), tag),
OperatorNode::OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind) :
Node(feat_ind, feats[0]->n_samp()),
_rung_offset(rung),
_feats(feats)
{}
{
set_feat_val_ptrs();
}
OperatorNode::OperatorNode(const OperatorNode &o) :
Node(o),
_feats(o._feats)
_rung_offset(o._rung_offset),
_feats(o._feats),
_feat_val_ptrs(o._feat_val_ptrs)
{}
std::string OperatorNode::expr()
{
std::string expression = _expr + "(";
for(auto& feat : _feats)
expression += feat->expr() + ", ";
return expression.substr(0, expression.size() - 2) + ")";
}
BOOST_SERIALIZATION_ASSUME_ABSTRACT(OperatorNode)
// BOOST_CLASS_EXPORTOperatorNode)
// BOOST_CLASS_EXPORT_GUID(OperatorNode, "OperatorNode")
\ No newline at end of file
......@@ -6,18 +6,22 @@
#include <boost/serialization/base_object.hpp>
#include <boost/serialization/export.hpp>
#include <boost/serialization/vector.hpp>
#include <boost/serialization/shared_ptr.hpp>
#include <boost/serialization/split_member.hpp>
#include <boost/serialization/vector.hpp>
class OperatorNode: public Node
{
friend class boost::serialization::access;
protected:
double* _value_ptr;
int _rung_offset;
std::vector<node_ptr> _feats;
std::vector<double*> _feat_val_ptrs;
public:
OperatorNode();
OperatorNode(std::vector<node_ptr> feats, int feat_ind, std::string expr, NODE_TYPE tag = NODE_TYPE::FXN);
OperatorNode(std::vector<node_ptr> feats, int rung, int feat_ind);
OperatorNode(const OperatorNode &o);
......@@ -27,13 +31,39 @@ public:
virtual void set_value() =0;
template <typename Archive>
void serialize(Archive& ar, const unsigned int version)
{
// ar.template register_type<OperatorNode>();
ar & boost::serialization::base_object<Node>(*this);
ar & _rung_offset;
ar & _feats;
set_feat_val_ptrs();
}
inline std::vector<double*> feat_value_ptrs(){return _feat_val_ptrs;}
inline int rung(){return _rung_offset;}
inline double* value_ptr()
{
if((_rung_offset > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_reg(_feat_ind) != _feat_ind))
set_value();
return node_value_arrs::get_value_ptr(_feat_ind);
}
inline bool is_nan(){return std::any_of(value_ptr(), value_ptr() + _n_samp, [](double d){return !std::isfinite(d);});}
inline bool is_const()
{
double mean = util_funcs::mean(value_ptr(), _n_samp);
return std::all_of(value_ptr(), value_ptr() + _n_samp, [&mean](double d){return std::abs(d - mean) < 1e-12;});
}
inline void set_feat_val_ptrs()
{
_value_ptr = node_value_arrs::get_value_ptr(_feat_ind, 0);
_feat_val_ptrs = std::vector<double*>(_feats.size());
for(int ff = 0; ff < _feats.size(); ++ff)
_feat_val_ptrs[ff] = node_value_arrs::get_value_ptr(_feats[ff]->feat_ind(), (_rung_offset + 2 - ff) % 3);
}
};
......
......@@ -5,27 +5,37 @@
AbsDiffNode::AbsDiffNode()
{}
AbsDiffNode::AbsDiffNode(std::vector<node_ptr> feats, int feat_ind) :
OperatorNode(feats, feat_ind, "abs_diff", NODE_TYPE::ABS_DIFF)
AbsDiffNode::AbsDiffNode(std::vector<node_ptr> feats, int rung, int feat_ind) :
OperatorNode(feats, rung, feat_ind)
{
if(feats[0]->unit() != feats[1]->unit())
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const())
throw InvalidFeatureException();
}
}
AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int feat_ind) :
OperatorNode({feat_1, feat_2}, feat_ind, "abs_diff", NODE_TYPE::ABS_DIFF)
AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind) :
OperatorNode({feat_1, feat_2}, rung, feat_ind)
{
if(feat_1->unit() != feat_2->unit())
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const())
throw InvalidFeatureException();
}
}
// BOOST_CLASS_EXPORTAbsDiffNode)