Commit f3bfec00 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

interface is working, but with some issues

parent 4e9e72b4
......@@ -28,6 +28,62 @@ SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::ve
_work = std::vector<double>(_lwork, 0.0);
}
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual) :
_prop(python_conv_utils::from_ndarray<double>(prop)),
_prop_test(python_conv_utils::from_ndarray<double>(prop_test)),
_a((n_dim + 1) * prop.shape(0)),
_b(prop.shape(0)),
_error(prop.shape(0), 0.0),
_s(n_dim + 1),
_task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)),
_task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)),
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_samp(prop.shape(0)),
_n_dim(n_dim),
_n_residual(n_residual),
_lwork(-1),
_rank(0)
{
// Initialize a, b, ones, s, and _error arrays
std::fill_n(_a.data(), (_n_dim + 1) * _n_samp, 0.0);
std::fill_n(_b.data(), _n_samp, 0.0);
std::fill_n(_s.data(), _n_dim + 1, 0.0);
// // Get optimal work size
_lwork = get_opt_lwork(_n_dim + 1);
_work = std::vector<double>(_lwork, 0.0);
}
SISSORegressor::SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual) :
_prop(python_conv_utils::from_list<double>(prop)),
_prop_test(python_conv_utils::from_list<double>(prop_test)),
_a((n_dim + 1) * boost::python::len(prop)),
_b(boost::python::len(prop)),
_error(boost::python::len(prop), 0.0),
_s(n_dim + 1),
_task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)),
_task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)),
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_samp(boost::python::len(prop)),
_n_dim(n_dim),
_n_residual(n_residual),
_lwork(-1),
_rank(0)
{
// Initialize a, b, ones, s, and _error arrays
std::fill_n(_a.data(), (_n_dim + 1) * _n_samp, 0.0);
std::fill_n(_b.data(), _n_samp, 0.0);
std::fill_n(_s.data(), _n_dim + 1, 0.0);
// // Get optimal work size
_lwork = get_opt_lwork(_n_dim + 1);
_work = std::vector<double>(_lwork, 0.0);
}
void SISSORegressor::set_a(std::vector<int>& inds, int start, int n_samp)
{
for(int ii = 0; ii < inds.size(); ++ii)
......@@ -194,20 +250,30 @@ void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim)
_models.push_back(models);
}
python::list SISSORegressor::models_py()
{
python::list model_list;
for(auto& m_list : _models)
model_list.append<python::list>(python_conv_utils::to_list<Model>(m_list));
return model_list;
}
void SISSORegressor::register_python()
{
using namespace boost::python;
class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, std::vector<double>, std::vector<double>, std::vector<int>, std::vector<int>, int, int>())
class_<SISSORegressor>("SISSORegressor", init<std::shared_ptr<FeatureSpace>, np::ndarray, np::ndarray, python::list, python::list, int, int>())
.def(init<std::shared_ptr<FeatureSpace>, python::list, python::list, python::list, python::list, int, int>())
.def("fit", &SISSORegressor::fit)
.add_property("prop", &SISSORegressor::prop)
.add_property("prop_test", &SISSORegressor::prop_test)
.add_property("models", &SISSORegressor::models)
.add_property("prop", &SISSORegressor::prop_py)
.add_property("prop_test", &SISSORegressor::prop_test_py)
.add_property("models", &SISSORegressor::models_py)
.add_property("n_samp", &SISSORegressor::n_samp)
.add_property("n_dim", &SISSORegressor::n_dim)
.add_property("n_residual", &SISSORegressor::n_residual)
.add_property("feat_space", &SISSORegressor::feat_space)
.add_property("error", &SISSORegressor::error)
.def_readonly("_task_sizes_train", &SISSORegressor::_task_sizes_train)
.def_readonly("_task_sizes_test", &SISSORegressor::_task_sizes_test)
.def_readonly("_mpi_comm", &SISSORegressor::_mpi_comm);
.add_property("error", &SISSORegressor::error_py)
.add_property("task_sizes_train", &SISSORegressor::task_sizes_train)
.add_property("task_sizes_test", &SISSORegressor::task_sizes_test)
;
}
\ No newline at end of file
......@@ -5,6 +5,8 @@
#include <descriptor_identifier/Model/Model.hpp>
#include <ctime>
namespace python = boost::python;
namespace np = boost::python::numpy;
/**
* @brief SISSO Regressor class, to find the best models, and store them
*
......@@ -45,6 +47,10 @@ public:
*/
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, std::vector<double> prop, std::vector<double> prop_test, std::vector<int> task_sizes_train, std::vector<int> task_sizes_test, int n_dim, int n_residual);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, np::ndarray prop, np::ndarray prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual);
SISSORegressor(std::shared_ptr<FeatureSpace> feat_space, python::list prop, python::list prop_test, python::list task_sizes_train, python::list task_sizes_test, int n_dim, int n_residual);
/**
* @brief Get the optimal size of the working array
*
......@@ -99,16 +105,22 @@ public:
*/
inline std::vector<double> prop(){return _prop;}
inline np::ndarray prop_py(){return python_conv_utils::to_ndarray<double>(_prop);}
/**
* @brief Acessor function for prop
*/
inline std::vector<double> prop_test(){return _prop_test;}
inline np::ndarray prop_test_py(){return python_conv_utils::to_ndarray<double>(_prop_test);}
/**
* @brief Acessor function for models
*/
inline std::vector<std::vector<Model>> models(){return _models;}
python::list models_py();
/**
* @brief Acessor function for n_samp
*/
......@@ -124,11 +136,16 @@ public:
*/
inline int n_residual(){return _n_residual;}
inline python::list task_sizes_train(){python_conv_utils::to_list<int>(_task_sizes_train);}
inline python::list task_sizes_test(){python_conv_utils::to_list<int>(_task_sizes_test);}
/**
* @brief Acessor function for {
*/
inline std::vector<double> error(){return _error;}
inline np::ndarray error_py(){return python_conv_utils::to_ndarray<double>(_error);}
static void register_python();
};
......
......@@ -51,6 +51,86 @@ FeatureSpace::FeatureSpace(
_max_temp_store(max_temp_store)
{
initialize_fs(prop);
}
FeatureSpace::FeatureSpace(
python::list phi_0,
python::list allowed_ops,
python::list prop,
python::list task_sizes,
int max_phi,
int n_sis_select,
int max_store_rung,
int n_rung_generate,
int max_temp_store,
double min_abs_feat_val,
double max_abs_feat_val
):
_phi(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)),
_phi_0(_phi),
_allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)),
_scores(python::len(phi_0), 0.0),
_task_sizes(python_conv_utils::from_list<int>(allowed_ops)),
_start_gen(1, 0),
_feature_space_file("feature_space/selected_features.txt"),
_mpi_comm(mpi_setup::comm),
_l_bound(min_abs_feat_val),
_u_bound(max_abs_feat_val),
_max_phi(max_phi),
_n_sis_select(n_sis_select),
_n_feat(python::len(phi_0)),
_n_rung_store(max_store_rung),
_n_rung_generate(n_rung_generate),
_max_temp_store(max_temp_store)
{
_n_samp = _phi_0[0]->n_samp();
initialize_fs(python_conv_utils::from_list<double>(prop));
}
FeatureSpace::FeatureSpace(
python::list phi_0,
python::list allowed_ops,
np::ndarray prop,
python::list task_sizes,
int max_phi,
int n_sis_select,
int max_store_rung,
int n_rung_generate,
int max_temp_store,
double min_abs_feat_val,
double max_abs_feat_val
):
_phi(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)),
_phi_0(_phi),
_allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)),
_scores(python::len(phi_0), 0.0),
_task_sizes(python_conv_utils::from_list<int>(task_sizes)),
_start_gen(1, 0),
_feature_space_file("feature_space/selected_features.txt"),
_mpi_comm(mpi_setup::comm),
_l_bound(min_abs_feat_val),
_u_bound(max_abs_feat_val),
_max_phi(max_phi),
_n_sis_select(n_sis_select),
_n_feat(python::len(phi_0)),
_n_rung_store(max_store_rung),
_n_rung_generate(n_rung_generate),
_max_temp_store(max_temp_store)
{
_n_samp = _phi_0[0]->n_samp();
initialize_fs(python_conv_utils::from_ndarray<double>(prop));
}
void FeatureSpace::initialize_fs(std::vector<double> prop)
{
if(_n_rung_store == -1)
_n_rung_store = _max_phi - 1;
if(_n_rung_generate > 1)
throw std::logic_error("A maximum of one rung can be generated on the fly.");
else if(_max_phi - _n_rung_generate < _n_rung_store)
throw std::logic_error("Requesting to store more rungs than what can be pre-generated.");
if(_mpi_comm->rank() == 0)
{
std::ofstream out_file_stream = std::ofstream();
......@@ -58,17 +138,13 @@ FeatureSpace::FeatureSpace(
out_file_stream << std::setw(14) <<std::left << "# FEAT_ID" << std::setw(24) << std::left << "Score" << "Feature Expression" << std::endl;
out_file_stream.close();
}
if(_max_temp_store != -1)
_max_temp_store /= 3;
_project = project_funcs::project_r;
if(_n_rung_generate > 1)
throw std::logic_error("A maximum of one rung can be generated on the fly.");
else if(_max_phi - _n_rung_generate < _n_rung_store)
throw std::logic_error("Requesting to store more rungs than what can be pre-generated.");
for(auto & op : allowed_ops)
for(auto & op : _allowed_ops)
{
if((op.compare("add") == 0) || (op.compare("sub") == 0) || (op.compare("mult") == 0) || (op.compare("abs_diff") == 0))
_com_bin_operators.push_back(allowed_op_maps::binary_operator_map[op]);
......@@ -77,11 +153,12 @@ FeatureSpace::FeatureSpace(
else
_un_operators.push_back(allowed_op_maps::unary_operator_map[op]);
}
generate_feature_space(prop);
_scores.reserve(_phi.size());
_scores.resize(_phi.size());
}
}
void FeatureSpace::generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound, double u_bound)
{
......@@ -735,24 +812,27 @@ void FeatureSpace::sis(std::vector<double>& prop)
}
if(_mpi_comm->rank() == 0)
out_file_stream.close();
}
void FeatureSpace::register_python()
{
using namespace boost::python;
class_<FeatureSpace>("FeatureSpace", init<std::shared_ptr<MPI_Interface>, std::vector<node_ptr>, std::vector<std::string>, std::vector<double>, std::vector<int>, optional<int, int, int, int, int, double, double>>())
.def("sis", &FeatureSpace::sis)
void (FeatureSpace::*sis_list)(list) = &FeatureSpace::sis;
void (FeatureSpace::*sis_ndarray)(np::ndarray) = &FeatureSpace::sis;
class_<FeatureSpace>("FeatureSpace", init<list, list, np::ndarray, list, optional<int, int, int, int, int, double, double>>())
.def(init<list, list, list, list, optional<int, int, int, int, int, double, double>>())
.def("sis", sis_list)
.def("sis", sis_ndarray)
.def("feat_in_phi", &FeatureSpace::feat_in_phi)
.add_property("phi_selected", &FeatureSpace::phi_selected)
.add_property("phi", &FeatureSpace::phi)
.add_property("phi0", &FeatureSpace::phi0)
.add_property("scores", &FeatureSpace::scores)
.add_property("task_sizes", &FeatureSpace::task_sizes)
.def_readonly("_allowed_ops", &FeatureSpace::_allowed_ops)
.def_readonly("_un_operators", &FeatureSpace::_un_operators)
.def_readonly("_com_bin_operators", &FeatureSpace::_com_bin_operators)
.def_readonly("_bin_operators", &FeatureSpace::_bin_operators)
.def_readonly("_start_gen", &FeatureSpace::_start_gen)
.add_property("phi_selected", &FeatureSpace::phi_selected_py)
.add_property("phi", &FeatureSpace::phi_py)
.add_property("phi0", &FeatureSpace::phi0_py)
.add_property("scores", &FeatureSpace::scores_py)
.add_property("task_sizes", &FeatureSpace::task_sizes_py)
.add_property("allowed_ops", &FeatureSpace::allowed_ops_py)
.add_property("start_gen", &FeatureSpace::start_gen_py)
.def_readonly("_feature_space_file", &FeatureSpace::_feature_space_file)
.def_readonly("_l_bound", &FeatureSpace::_l_bound)
.def_readonly("_u_bound", &FeatureSpace::_u_bound)
......@@ -762,7 +842,7 @@ void FeatureSpace::register_python()
.def_readonly("_n_feat", &FeatureSpace::_n_feat)
.def_readonly("_n_rung_store", &FeatureSpace::_n_rung_store)
.def_readonly("_n_rung_generate", &FeatureSpace::_n_rung_generate)
.def_readonly("_mpi_comm", &FeatureSpace::_mpi_comm);
;
}
......
......@@ -14,7 +14,9 @@
#include <iostream>
#include <iomanip>
// namespace mpi = boost::mpi;
namespace python = boost::python;
namespace np = boost::python::numpy;
/**
* @brief Feature Space for SISSO calculations
* @details Stores and performs all feature calculations for SIS
......@@ -70,13 +72,63 @@ public:
std::vector<int> task_sizes,
int max_phi=1,
int n_sis_select=1,
int max_store_rung=2,
int max_store_rung=-1,
int n_rung_generate=0,
int max_temp_store=-1,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50
);
/**
* @brief Constructor for the feature space
* @details constructs the feature space from an initial set of features and a list of allowed operatiors
*
* @param mpi_comm MPI communicator for the calculations
* @param allowed_ops list of allowed operators
* @param max_phi highest rung value for the calculation
* @param n_sis_select number of features to select during each SIS step
* @param max_abs_feat_val maximum absolute feature value
*/
FeatureSpace(
python::list phi_0,
python::list allowed_ops,
python::list prop,
python::list task_sizes,
int max_phi=1,
int n_sis_select=1,
int max_store_rung=-1,
int n_rung_generate=0,
int max_temp_store=-1,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50
);
/**
* @brief Constructor for the feature space
* @details constructs the feature space from an initial set of features and a list of allowed operatiors
*
* @param mpi_comm MPI communicator for the calculations
* @param allowed_ops list of allowed operators
* @param max_phi highest rung value for the calculation
* @param n_sis_select number of features to select during each SIS step
* @param max_abs_feat_val maximum absolute feature value
*/
FeatureSpace(
python::list phi_0,
python::list allowed_ops,
np::ndarray prop,
python::list task_sizes,
int max_phi=1,
int n_sis_select=1,
int max_store_rung=-1,
int n_rung_generate=0,
int max_temp_store=-1,
double min_abs_feat_val=1e-50,
double max_abs_feat_val=1e50
);
void initialize_fs(std::vector<double> prop);
/**
* @brief Generate the full feature set from the allowed operators and initial feature set
* @details populates phi with all features from an initial set and the allowed operators
......@@ -88,20 +140,28 @@ public:
*/
inline std::vector<node_ptr> phi_selected(){return _phi_selected;};
inline boost::python::list phi_selected_py(){return python_conv_utils::to_list<node_ptr>(_phi_selected);};
/**
* @brief Accessor function for _phi
*/
inline std::vector<node_ptr> phi(){return _phi;};
inline boost::python::list phi_py(){return python_conv_utils::to_list<node_ptr>(_phi);};
/**
* @brief Accessor function for _phi_0
*/
inline std::vector<node_ptr> phi0(){return _phi_0;};
inline boost::python::list phi0_py(){return python_conv_utils::to_list<node_ptr>(_phi_0);};
/**
* @brief Accessor function for _scores
*/
inline std::vector<double> scores(){return _scores;};
inline std::vector<double> scores(){return _scores;}
inline np::ndarray scores_py(){return python_conv_utils::to_ndarray<double>(_scores);};
/**
* @brief Accessor function for _mpi_comm
......@@ -110,6 +170,12 @@ public:
inline std::vector<int> task_sizes(){return _task_sizes;}
inline boost::python::list task_sizes_py(){return python_conv_utils::to_list<int>(_task_sizes);};
inline boost::python::list allowed_ops_py(){return python_conv_utils::to_list<std::string>(_allowed_ops);}
inline boost::python::list start_gen_py(){return python_conv_utils::to_list<int>(_start_gen);}
void generate_new_feats(std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, int& feat_ind, double l_bound=1e-50, double u_bound=1e50);
void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected, std::vector<double>& scores_comp);
......@@ -125,6 +191,18 @@ public:
*/
void sis(std::vector<double>& prop);
inline void sis(np::ndarray prop)
{
std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop);
sis(prop_vec);
}
inline void sis(python::list prop)
{
std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop);
sis(prop_vec);
}
/**
* @brief Is a feature in this process' _phi?
*
......
......@@ -14,6 +14,45 @@ FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> val
set_test_value();
}
FeatureNode::FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Unit unit) :
Node(feat_ind, value.shape(0), test_value.shape(0)),
_value(python_conv_utils::from_ndarray<double>(value)),
_test_value(python_conv_utils::from_ndarray<double>(test_value)),
_unit(unit),
_expr(expr)
{
// Automatically resize the storage arrays
if(node_value_arrs::N_STORE_FEATURES == 0)
node_value_arrs::initialize_values_arr(_n_samp, _n_test_samp, 1);
else if((_n_samp != node_value_arrs::N_SAMPLES) || (_n_test_samp != node_value_arrs::N_SAMPLES_TEST))
throw std::logic_error("Number of samples in current feature is not the same as the others, (" + std::to_string(_n_samp) + " and " + std::to_string(_n_test_samp) + " vs. " + std::to_string(node_value_arrs::N_SAMPLES) + " and " + std::to_string(node_value_arrs::N_SAMPLES_TEST) + ")");
else if(feat_ind >= node_value_arrs::N_STORE_FEATURES)
node_value_arrs::resize_values_arr(0, node_value_arrs::N_STORE_FEATURES + 1, node_value_arrs::N_STORE_FEATURES + 1, true);
set_value();
set_test_value();
}
FeatureNode::FeatureNode(int feat_ind, std::string expr, python::list value, python::list test_value, Unit unit) :
Node(feat_ind, python::len(value), python::len(test_value)),
_value(python_conv_utils::from_list<double>(value)),
_test_value(python_conv_utils::from_list<double>(test_value)),
_unit(unit),
_expr(expr)
{
// Automatically resize the storage arrays
if(node_value_arrs::N_STORE_FEATURES == 0)
node_value_arrs::initialize_values_arr(_n_samp, _n_test_samp, 1);
else if((_n_samp != node_value_arrs::N_SAMPLES) || (_n_test_samp != node_value_arrs::N_SAMPLES_TEST))
throw std::logic_error("Number of samples in current feature is not the same as the others, (" + std::to_string(_n_samp) + " and " + std::to_string(_n_test_samp) + " vs. " + std::to_string(node_value_arrs::N_SAMPLES) + " and " + std::to_string(node_value_arrs::N_SAMPLES_TEST) + ")");
else if(feat_ind >= node_value_arrs::N_STORE_FEATURES)
node_value_arrs::resize_values_arr(0, node_value_arrs::N_STORE_FEATURES + 1, node_value_arrs::N_STORE_FEATURES + 1, true);
set_value();
set_test_value();
}
FeatureNode::~FeatureNode()
{}
......@@ -43,17 +82,15 @@ void FeatureNode::register_python()
std::string (FeatureNode::*expr_const)() const = &FeatureNode::expr;
using namespace boost::python;
class_<FeatureNode, bases<Node>>("FeatureNode", init<int, std::string, std::vector<double>, std::vector<double>, Unit>())
class_<FeatureNode, bases<Node>>("FeatureNode", init<int, std::string, np::ndarray, np::ndarray, Unit>())
.def(init<int, std::string, python::list, python::list, Unit>())
.def("is_nan", &FeatureNode::is_nan)
.def("is_const", &FeatureNode::is_const)
.def("set_value", &FeatureNode::set_value)
.def("set_test_value", &FeatureNode::set_test_value)
.add_property("value", &FeatureNode::value)
.add_property("test_value", &FeatureNode::test_value)
.add_property("expr", expr_1)
.add_property("expr", expr_const)
.add_property("unit", &FeatureNode::unit)
.add_property("type", &FeatureNode::type)
.add_property("rung", &FeatureNode::rung)
;
}
......
#ifndef FEATURE_NODE
#define FEATURE_NODE
#include <python/conversion_utils.hpp>
#include <utils/math_funcs.hpp>
#include <utils/enum.hpp>
#include <memory>
#include <boost/serialization/export.hpp>
#include <boost/serialization/base_object.hpp>
#include <boost/python/numpy.hpp>
#include <feature_creation/node/Node.hpp>
namespace np = boost::python::numpy;
namespace python = boost::python;
/**
* @brief Node that describe the leaves of the operator graph (Initial features in Phi_0)
*/
......@@ -55,6 +61,8 @@ public:
* @param unit Unit of the feature
*/
FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit);
FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Unit unit);
FeatureNode(int feat_ind, std::string expr, python::list value, python::list test_value, Unit unit);
FeatureNode(const FeatureNode&) = default;
FeatureNode(FeatureNode&&) = default;
......
......@@ -42,7 +42,6 @@ void ModelNode::register_python()
.def("is_const", &ModelNode::is_const)
.def("set_value", &ModelNode::set_value)
.def("set_test_value", &ModelNode::set_test_value)
.add_property("type", &ModelNode::type)
.add_property("rung", &ModelNode::rung)
;
}
......@@ -103,15 +103,14 @@ void Node::register_python()
.add_property("arr_ind", &Node::arr_ind)
.add_property("selected", &Node::selected, &Node::set_selected)
.add_property("d_mat_ind", &Node::d_mat_ind, &Node::set_d_mat_ind)