Commit 603f56ac authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Add bounds to features and reorder some member variables

Check if this helps with memory problems
parent 7ae9e7d2
......@@ -25,20 +25,22 @@ FeatureSpace::FeatureSpace(
int max_phi,
int n_sis_select,
int max_store_rung,
double max_abs_feat_val
double max_abs_feat_val,
double min_abs_feat_val
):
_phi(phi_0),
_phi_0(phi_0),
_allowed_ops(allowed_ops),
_scores(phi_0.size(), 0.0),
_start_gen(1, 0),
_mpi_comm(mpi_comm),
_l_bound(min_abs_feat_val),
_u_bound(max_abs_feat_val),
_max_phi(max_phi),
_n_sis_select(n_sis_select),
_n_samp(phi_0[0]->n_samp()),
_n_feat(phi_0.size()),
_n_rung_store(max_store_rung),
_max_abs_feat_val(max_abs_feat_val),
_start_gen(1, 0),
_scores(phi_0.size(), 0.0),
_allowed_ops(allowed_ops),
_phi_0(phi_0),
_phi(phi_0)
_n_rung_store(max_store_rung)
{
for(auto & op : allowed_ops)
{
......@@ -56,22 +58,30 @@ FeatureSpace::FeatureSpace(
void FeatureSpace::generate_feature_space()
{
double u_bound = 1e50;
double l_bound = 1e-50;
for(int nn = 1; nn <= _max_phi; ++nn)
{
if(nn == _max_phi)
{
u_bound = _u_bound;
l_bound = _l_bound;
}
std::vector<node_ptr> next_phi;
_n_feat = _phi.size();
// std::array<int, 2> start_end = _mpi_comm->get_start_end_for_iterator(_phi.size() - _start_gen[_start_gen.size()-1], _start_gen[_start_gen.size()-1]);
int feat_ind = _phi.size();
// for(auto feat_1 = _phi.begin() + start_end[0]; feat_1 != _phi.begin() + start_end[1]; ++feat_1)
for(auto feat_1 = _phi.begin() + _mpi_comm->rank(); feat_1 < _phi.end(); feat_1 += _mpi_comm->size())
{
int phi_ind = feat_1 - _phi.begin();
next_phi.reserve(_un_operators.size() + phi_ind * (_com_bin_operators.size() + 2 * _bin_operators.size()));
for(auto& op : _un_operators)
{
try
{
next_phi.push_back(op(*feat_1, nn, feat_ind));
next_phi.push_back(op(*feat_1, nn, feat_ind, l_bound, u_bound));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -86,7 +96,7 @@ void FeatureSpace::generate_feature_space()
{
try
{
next_phi.push_back(op(*feat_1, *feat_2, nn, feat_ind));
next_phi.push_back(op(*feat_1, *feat_2, nn, feat_ind, l_bound, u_bound));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -102,7 +112,7 @@ void FeatureSpace::generate_feature_space()
{
try
{
next_phi.push_back(op(*feat_1, *feat_2, nn, feat_ind));
next_phi.push_back(op(*feat_1, *feat_2, nn, feat_ind, l_bound, u_bound));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -111,7 +121,7 @@ void FeatureSpace::generate_feature_space()
}
try
{
next_phi.push_back(op(*feat_2, *feat_1, nn, feat_ind));
next_phi.push_back(op(*feat_2, *feat_1, nn, feat_ind, l_bound, u_bound));
++feat_ind;
}
catch(const InvalidFeatureException& e)
......@@ -333,7 +343,7 @@ void FeatureSpace::sis(std::vector<double>& prop)
dgemv_('T', _n_samp, cur_feat, 1.0 / static_cast<double>(_n_samp), node_value_arrs::D_MATRIX.data(), _n_samp, _phi[inds[ii]]->value_ptr(), 1, 0.0, scores_comp.data(), 1);
if(1.0 - std::abs(*std::max_element(scores_comp.begin(), scores_comp.end(), [](double a, double b){return std::abs(a) < std::abs(b);})) < 1e-13)
if(1.0 - util_funcs::max_abs_val<double>(scores_comp.data(), scores_comp.size()) < 1e-13)
is_valid = false;
std::transform(_phi[inds[ii]]->value_ptr(), _phi[inds[ii]]->value_ptr() + _n_samp, _phi[inds[ii]]->value_ptr(), [&cur_feat_mean, &cur_feat_std](double val){return val * cur_feat_std + cur_feat_mean;});
......@@ -358,14 +368,13 @@ void FeatureSpace::sis(std::vector<double>& prop)
}
++ii;
}
// Unstandardize the description matrix
if(cur_feat > 0)
for(int dd = 0; dd < cur_feat; ++dd)
std::transform(node_value_arrs::get_d_matrix_ptr(dd), node_value_arrs::get_d_matrix_ptr(dd) + _n_samp, node_value_arrs::get_d_matrix_ptr(dd), [&means, &stand_devs, &dd](double val){return val * stand_devs[dd] + means[dd];});
phi_selected.resize(_n_sis_select);
// inds_selected.resize(cur_feat_local);
// scores_selected.resize(cur_feat_local);
// If we are only on one process then phi_selected are the selected features
if(_mpi_comm->size() > 1)
......@@ -404,7 +413,6 @@ void FeatureSpace::sis(std::vector<double>& prop)
}
mpi::broadcast(*_mpi_comm, inds, 0);
// for(auto& feat : phi_selected)
// Erase all scores, features, and indicies from the selected arrays that are not needed from those selected locally
for(int ii = _n_sis_select - 1; ii >= 0; --ii)
{
......
......@@ -19,28 +19,31 @@
*/
class FeatureSpace
{
std::vector<node_ptr> _phi; //!< all features
std::vector<node_ptr> _phi_selected; //!< selected features
std::vector<node_ptr> _phi_0; //!< initial feature space
std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings
std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators
std::vector<bin_op_node_gen> _com_bin_operators; //!< list of all commutable binary operators
std::vector<bin_op_node_gen> _bin_operators; //!< list of all binary operators
std::vector<double> _scores; //!< projection scores for each feature
std::vector<double> _prop; //!< property to learn
std::vector<int> _start_gen; //!< list of starting index for each generation
std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPi communicator
double _l_bound; //!< lower bound for absolute value of the features
double _u_bound; //!< upper bound for absolute value of the features
int _max_phi; //!< Maximum rung for the feature creation
int _n_sis_select; //!< Number of features to select for each dimensions
int _n_samp; //!< Number of samples
int _n_feat; //!< Total number of features
int _n_rung_store; //!< Total rungs stored
double _max_abs_feat_val; //!< Maximum absolute value for any feature
std::vector<int> _start_gen; //!< list of starting index for each generation
std::vector<double> _prop; //!< property to learn
std::vector<double> _scores; //!< projection scores for each feature
std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings
std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators
std::vector<bin_op_node_gen> _bin_operators; //!< list of all binary operators
std::vector<bin_op_node_gen> _com_bin_operators; //!< list of all commutable binary operators
std::vector<node_ptr> _phi_selected; //!< selected features
std::vector<node_ptr> _phi; //!< all features
std::vector<node_ptr> _phi_0; //!< initial feature space
public:
/**
* @brief Constructor for the feature space
......@@ -59,7 +62,8 @@ public:
int max_phi=1,
int n_sis_select=1,
int max_store_rung=2,
double max_abs_feat_val=1e27
double max_abs_feat_val=1e50,
double min_abs_feat_val=1e-50
);
/**
......
#include <feature_creation/node/operator_nodes/OperatorNode.hpp>
// OperatorNode::OperatorNode()
// {}
// OperatorNode::OperatorNode(std::array<node_ptr, 1> feats, int rung, int feat_ind) :
// Node(feat_ind, feats[0]->n_samp(), feats[0]->n_test_samp()),
// _feats(feats)
// {}
// OperatorNode::~OperatorNode()
// {}
// double* OperatorNode::value_ptr(int offset)
// {
// offset = (offset == -1) ? rung() : offset;
// if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_reg(_arr_ind, offset) != _arr_ind))
// {
// set_value(offset);
// node_value_arrs::temp_storage_reg(_arr_ind, offset) = _arr_ind;
// }
// return node_value_arrs::get_value_ptr(_arr_ind, offset);
// }
// double* OperatorNode::test_value_ptr(int offset)
// {
// offset = (offset == -1) ? rung() : offset;
// if((rung() > node_value_arrs::N_RUNGS_STORED) && (node_value_arrs::temp_storage_test_reg(_arr_ind, offset) != _arr_ind))
// {
// set_test_value(offset);
// node_value_arrs::temp_storage_test_reg(_arr_ind, offset) = _arr_ind;
// }
// return node_value_arrs::get_test_value_ptr(_arr_ind, offset);
// }
// std::vector<double> OperatorNode::value()
// {
// std::vector<double> val(_n_samp, 0.0);
// std::copy_n(value_ptr(), _n_samp, val.data());
// return val;
// }
// std::vector<double> OperatorNode::test_value()
// {
// std::vector<double> val(_n_test_samp, 0.0);
// std::copy_n(test_value_ptr(), _n_test_samp, val.data());
// return val;
// }
BOOST_SERIALIZATION_ASSUME_ABSTRACT(OperatorNode)
......@@ -3,6 +3,7 @@
#include <feature_creation/node/Node.hpp>
#include <feature_creation/node/operator_nodes/functions.hpp>
#include <utils/math_funcs.hpp>
#include <boost/serialization/base_object.hpp>
#include <boost/serialization/export.hpp>
......
......@@ -5,7 +5,7 @@
AbsDiffNode::AbsDiffNode()
{}
AbsDiffNode::AbsDiffNode(std::array<node_ptr, 2> feats, int rung, int feat_ind) :
AbsDiffNode::AbsDiffNode(std::array<node_ptr, 2> feats, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode(feats, rung, feat_ind)
{
if(feats[0]->unit() != feats[1]->unit())
......@@ -27,13 +27,13 @@ AbsDiffNode::AbsDiffNode(std::array<node_ptr, 2> feats, int rung, int feat_ind)
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
}
AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind) :
AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode({feat_1, feat_2}, rung, feat_ind)
{
if(feat_1->unit() != feat_2->unit())
......@@ -55,7 +55,7 @@ AbsDiffNode::AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_in
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
......
......@@ -16,9 +16,9 @@ class AbsDiffNode: public OperatorNode<2>
public:
AbsDiffNode();
AbsDiffNode(std::array<node_ptr, 2> feats, int rung, int feat_ind);
AbsDiffNode(std::array<node_ptr, 2> feats, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind);
AbsDiffNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
inline Unit unit(){return _feats[0]->unit();}
......
......@@ -5,21 +5,21 @@
AbsNode::AbsNode()
{}
AbsNode::AbsNode(std::array<node_ptr, 1> feats, int rung, int feat_ind):
AbsNode::AbsNode(std::array<node_ptr, 1> feats, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode(feats, rung, feat_ind)
{
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
}
AbsNode::AbsNode(node_ptr feat, int rung, int feat_ind):
AbsNode::AbsNode(node_ptr feat, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode({feat}, rung, feat_ind)
{
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
......
......@@ -15,9 +15,9 @@ class AbsNode: public OperatorNode<1>
public:
AbsNode();
AbsNode(std::array<node_ptr, 1> feats, int rung, int feat_ind);
AbsNode(std::array<node_ptr, 1> feats, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
AbsNode(node_ptr feat, int rung, int feat_ind);
AbsNode(node_ptr feat, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
inline Unit unit(){return _feats[0]->unit();}
......
......@@ -3,7 +3,7 @@
AddNode::AddNode()
{}
AddNode::AddNode(std::array<node_ptr, 2> feats, int rung, int feat_ind):
AddNode::AddNode(std::array<node_ptr, 2> feats, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode(feats, rung, feat_ind)
{
if(feats[0]->unit() != feats[1]->unit())
......@@ -25,13 +25,13 @@ AddNode::AddNode(std::array<node_ptr, 2> feats, int rung, int feat_ind):
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
}
AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind):
AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode({feat_1, feat_2}, rung, feat_ind)
{
if(feat_1->unit() != feat_2->unit())
......@@ -53,7 +53,7 @@ AddNode::AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind):
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
......
......@@ -15,9 +15,9 @@ class AddNode: public OperatorNode<2>
public:
AddNode();
AddNode(std::array<node_ptr, 2> feats, int rung, int feat_ind);
AddNode(std::array<node_ptr, 2> feats, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind);
AddNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
inline Unit unit(){return _feats[0]->unit();}
......
......@@ -3,7 +3,7 @@
CosNode::CosNode()
{}
CosNode::CosNode(std::array<node_ptr, 1> feats, int rung, int feat_ind):
CosNode::CosNode(std::array<node_ptr, 1> feats, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode(feats, rung, feat_ind)
{
if(feats[0]->unit() != Unit())
......@@ -13,13 +13,13 @@ CosNode::CosNode(std::array<node_ptr, 1> feats, int rung, int feat_ind):
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
}
CosNode::CosNode(node_ptr feat, int rung, int feat_ind):
CosNode::CosNode(node_ptr feat, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode({feat}, rung, feat_ind)
{
if(feat->unit() != Unit())
......@@ -29,7 +29,7 @@ CosNode::CosNode(node_ptr feat, int rung, int feat_ind):
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
......
......@@ -15,9 +15,9 @@ class CosNode: public OperatorNode<1>
public:
CosNode();
CosNode(std::array<node_ptr, 1> feats, int rung, int feat_ind);
CosNode(std::array<node_ptr, 1> feats, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
CosNode(node_ptr feat, int rung, int feat_ind);
CosNode(node_ptr feat, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
inline Unit unit(){return Unit();}
......
......@@ -3,27 +3,27 @@
CbNode::CbNode()
{}
CbNode::CbNode(std::array<node_ptr, 1> feats, int rung, int feat_ind):
CbNode::CbNode(std::array<node_ptr, 1> feats, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode(feats, rung, feat_ind)
{
if(feats[0]->type() == NODE_TYPE::CBRT)
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
}
CbNode::CbNode(node_ptr feat, int rung, int feat_ind):
CbNode::CbNode(node_ptr feat, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode({feat}, rung, feat_ind)
{
if(feat->type() == NODE_TYPE::CBRT)
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
......
......@@ -15,9 +15,9 @@ class CbNode: public OperatorNode<1>
public:
CbNode();
CbNode(std::array<node_ptr, 1> feats, int rung, int feat_ind);
CbNode(std::array<node_ptr, 1> feats, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
CbNode(node_ptr feat, int rung, int feat_ind);
CbNode(node_ptr feat, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
inline Unit unit(){return _feats[0]->unit()^(3.0);}
......
......@@ -3,27 +3,27 @@
CbrtNode::CbrtNode()
{}
CbrtNode::CbrtNode(std::array<node_ptr, 1> feats, int rung, int feat_ind):
CbrtNode::CbrtNode(std::array<node_ptr, 1> feats, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode(feats, rung, feat_ind)
{
if(feats[0]->type() == NODE_TYPE::CB)
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
}
CbrtNode::CbrtNode(node_ptr feat, int rung, int feat_ind):
CbrtNode::CbrtNode(node_ptr feat, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode({feat}, rung, feat_ind)
{
if(feat->type() == NODE_TYPE::CB)
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
......
......@@ -15,9 +15,9 @@ class CbrtNode: public OperatorNode<1>
public:
CbrtNode();
CbrtNode(std::array<node_ptr, 1> feats, int rung, int feat_ind);
CbrtNode(std::array<node_ptr, 1> feats, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
CbrtNode(node_ptr feat, int rung, int feat_ind);
CbrtNode(node_ptr feat, int rung, int feat_ind, double l_bound = 1e-50, double u_bound = 1e50);
inline Unit unit(){return _feats[0]->unit()^(1.0 / 3.0);}
......
......@@ -3,7 +3,7 @@
DivNode::DivNode()
{}
DivNode::DivNode(std::array<node_ptr, 2> feats, int rung, int feat_ind):
DivNode::DivNode(std::array<node_ptr, 2> feats, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode(feats, rung, feat_ind)
{
if((feats[0]->type() == NODE_TYPE::INV) || (feats[1]->type() == NODE_TYPE::INV))
......@@ -25,13 +25,13 @@ DivNode::DivNode(std::array<node_ptr, 2> feats, int rung, int feat_ind):
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
}
DivNode::DivNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind):
DivNode::DivNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind, double l_bound, double u_bound):
OperatorNode({feat_1, feat_2}, rung, feat_ind)
{
if((feat_1->type() == NODE_TYPE::INV) || (feat_2->type() == NODE_TYPE::INV))
......@@ -53,7 +53,7 @@ DivNode::DivNode(node_ptr feat_1, node_ptr feat_2, int rung, int feat_ind):
throw InvalidFeatureException();
set_value();
if(is_nan() || is_const())
if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound))
throw InvalidFeatureException();
set_test_value();
......