Commit c46482b3 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Merge branch 'master' of gitlab.mpcdf.mpg.de:tpurcell/cpp_sisso into joss

parents f1b0b5ac 4229d2dc
......@@ -356,6 +356,7 @@ public:
* @param project_type (str) The type of loss function/projection operator to use
* @param n_sis_select (int) The number of features to select during each SIS step
* @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
* @param excluded_inds (list) The list of primary feature indexes to not include in any features
*/
FeatureSpace(
std::string feature_file,
......@@ -364,7 +365,8 @@ public:
py::list task_sizes_train,
std::string project_type="regression",
int n_sis_select=1,
double cross_corr_max=1.0
double cross_corr_max=1.0,
py::list excluded_inds = py::list()
);
// DocString: feat_space_init_file_py_list
......@@ -378,6 +380,7 @@ public:
* @param project_type (str) The type of loss function/projection operator to use
* @param n_sis_select (int) The number of features to select during each SIS step
* @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
* @param excluded_inds (list) The list of primary feature indexes to not include in any features
*/
FeatureSpace(
std::string feature_file,
......@@ -386,7 +389,8 @@ public:
py::list task_sizes_train,
std::string project_type="regression",
int n_sis_select=1,
double cross_corr_max=1.0
double cross_corr_max=1.0,
py::list excluded_inds = py::list()
);
// DocString: feat_space_sis_arr
......
......@@ -30,13 +30,22 @@ void str2node::set_parameters(node_ptr feat, const std::vector<std::string> op_t
}
#endif
node_ptr str2node::postfix2node(const std::string postfix_expr, const std::vector<node_ptr>& phi_0, unsigned long int& feat_ind)
node_ptr str2node::postfix2node(const std::string postfix_expr, const std::vector<node_ptr>& phi_0, unsigned long int& feat_ind, const std::vector<int>& excluded_inds)
{
std::vector<node_ptr> stack;
std::vector<std::string> postfix_split = str_utils::split_string_trim(postfix_expr, "|");
if(postfix_split.size() == 1)
{
if(std::stoi(postfix_split[0]) >= phi_0.size())
{
throw std::logic_error("Accessing feature outside of phi_0");
}
if(std::find(excluded_inds.begin(), excluded_inds.end(), std::stoi(postfix_split[0])) != excluded_inds.end())
{
throw InvalidFeatureException();
return nullptr;
}
return phi_0[std::stoi(postfix_split[0])];
}
......@@ -45,6 +54,17 @@ node_ptr str2node::postfix2node(const std::string postfix_expr, const std::vecto
std::string term = postfix_split[ff];
if(term.find_first_not_of("0123456789") == std::string::npos)
{
if(std::stoi(term) >= phi_0.size())
{
throw std::logic_error("Accessing feature outside of phi_0");
}
if(std::find(excluded_inds.begin(), excluded_inds.end(), std::stoi(postfix_split[0])) != excluded_inds.end())
{
throw InvalidFeatureException();
return nullptr;
}
stack.push_back(phi_0[std::stoi(term)]);
++feat_ind;
}
......@@ -345,7 +365,7 @@ node_ptr str2node::postfix2node(const std::string postfix_expr, const std::vecto
return stack[0];
}
std::vector<node_ptr> str2node::phi_selected_from_file(const std::string filename, const std::vector<node_ptr> phi_0)
std::vector<node_ptr> str2node::phi_selected_from_file(const std::string filename, const std::vector<node_ptr> phi_0, const std::vector<int>& excluded_inds)
{
node_value_arrs::resize_values_arr(0, phi_0.size());
node_value_arrs::initialize_d_matrix_arr();
......@@ -371,7 +391,7 @@ std::vector<node_ptr> str2node::phi_selected_from_file(const std::string filenam
node_value_arrs::resize_d_matrix_arr(1);
boost::algorithm::split(split_line, line, boost::algorithm::is_any_of("\t "), boost::token_compress_on);
node_ptr new_feat = postfix2node(split_line[1], phi_0, feat_ind);
node_ptr new_feat = postfix2node(split_line[1], phi_0, feat_ind, excluded_inds);
new_feat->set_selected(true);
new_feat->set_d_mat_ind(feat_sel);
......@@ -384,7 +404,7 @@ std::vector<node_ptr> str2node::phi_selected_from_file(const std::string filenam
return phi_selected;
}
std::vector<node_ptr> str2node::phi_from_file(const std::string filename, const std::vector<node_ptr> phi_0)
std::vector<node_ptr> str2node::phi_from_file(const std::string filename, const std::vector<node_ptr> phi_0, const std::vector<int>& excluded_inds)
{
node_value_arrs::resize_values_arr(0, phi_0.size());
......@@ -408,10 +428,9 @@ std::vector<node_ptr> str2node::phi_from_file(const std::string filename, const
}
try
{
node_ptr feat = postfix2node(line, phi_0, feat_ind);
node_ptr feat = postfix2node(line, phi_0, feat_ind, excluded_inds);
if(feat->type() == NODE_TYPE::FEAT)
{
++n_prim_feat;
continue;
}
phi.push_back(feat);
......
......@@ -36,9 +36,11 @@ namespace str2node
* @param postfix_expr The postfix expression of the feature node
* @param phi_0 The primary feature space
* @param feat_ind The index of the new feature
* @param excluded_inds The list of primary feature indexes to not include in any features
*
* @return The feature node described by the postfix expression
*/
node_ptr postfix2node(const std::string postfix_expr, const std::vector<node_ptr>& phi_0, unsigned long int& feat_ind);
node_ptr postfix2node(const std::string postfix_expr, const std::vector<node_ptr>& phi_0, unsigned long int& feat_ind, const std::vector<int>& excluded_inds);
/**
* @brief Convert a feature_space/selected_features.txt into a phi_selected;
......@@ -46,10 +48,11 @@ node_ptr postfix2node(const std::string postfix_expr, const std::vector<node_ptr
*
* @param filename The name of the feature_space/selected_features.txt file
* @param phi_0 The initial feature space
* @param excluded_inds The list of primary feature indexes to not include in any features
*
* @return The selected feature set from the file
*/
std::vector<node_ptr> phi_selected_from_file(const std::string filename, const std::vector<node_ptr> phi_0);
std::vector<node_ptr> phi_selected_from_file(const std::string filename, const std::vector<node_ptr> phi_0, const std::vector<int>& excluded_inds);
/**
* @brief Convert a text file containing postfix expressions of features into the feature space
......@@ -57,10 +60,11 @@ std::vector<node_ptr> phi_selected_from_file(const std::string filename, const s
*
* @param filename The name of the file storing all the features
* @param phi_0 The initial feature space
* @param excluded_inds The list of primary feature indexes to not include in any features
*
* @return The feature set defined from the file
*/
std::vector<node_ptr> phi_from_file(const std::string filename, const std::vector<node_ptr> phi_0);
std::vector<node_ptr> phi_from_file(const std::string filename, const std::vector<node_ptr> phi_0, const std::vector<int>& excluded_inds);
#ifdef PARAMETERIZE
/**
......
......@@ -62,7 +62,7 @@ void sisso::register_all()
def(
"phi_selected_from_file",
&str2node::phi_selected_from_file_py,
(arg("filename"), arg("phi_0")),
(arg("filename"), arg("phi_0"), arg("excluded_inds")),
"@DocString_node_utils_phi_sel_from_file@"
);
......@@ -252,7 +252,7 @@ void sisso::feature_creation::registerFeatureSpace()
)
)
.def(
init<std::string, list, list, list, optional<std::string, int, double>>(
init<std::string, list, list, list, optional<std::string, int, double, py::list>>(
(
arg("self"),
arg("feature_file"),
......@@ -261,13 +261,14 @@ void sisso::feature_creation::registerFeatureSpace()
arg("task_sizes"),
arg("project_type")="regression",
arg("n_sis_select")=1,
arg("cross_corr_max")=1.0
arg("cross_corr_max")=1.0,
arg("excluded_inds")=py::list()
),
"@DocString_feat_space_init_file_py_list@"
)
)
.def(
init<std::string, list, np::ndarray, list, optional<std::string, int, double>>(
init<std::string, list, np::ndarray, list, optional<std::string, int, double, py::list>>(
(
arg("self"),
arg("feature_file"),
......@@ -276,7 +277,8 @@ void sisso::feature_creation::registerFeatureSpace()
arg("task_sizes"),
arg("project_type")="regression",
arg("n_sis_select")=1,
arg("cross_corr_max")=1.0
arg("cross_corr_max")=1.0,
arg("excluded_inds")=py::list()
),
"@DocString_feat_space_init_file_np_array@"
)
......
......@@ -28,7 +28,8 @@ FeatureSpace::FeatureSpace(
py::list task_sizes,
std::string project_type,
int n_sis_select,
double cross_corr_max
double cross_corr_max,
py::list excluded_inds
):
_phi_0(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)),
_prop_train(python_conv_utils::from_ndarray<double>(prop)),
......@@ -48,6 +49,7 @@ FeatureSpace::FeatureSpace(
_n_rung_generate(0),
_n_samp_train(_phi_0[0]->n_samp())
{
std::vector<int> ef = python_conv_utils::from_list<int>(excluded_inds);
#ifdef PARAMETERIZE
_max_param_depth = -1;
_reparam_residual = false;
......@@ -55,8 +57,7 @@ FeatureSpace::FeatureSpace(
comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp_train, _is_valid, _is_valid_feat_list);
mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select);
std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0);
std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0, ef);
phi_temp.insert(phi_temp.begin(), _phi_0.begin(), _phi_0.end());
_n_feat = phi_temp.size();
......@@ -211,7 +212,8 @@ FeatureSpace::FeatureSpace(
py::list task_sizes,
std::string project_type,
int n_sis_select,
double cross_corr_max
double cross_corr_max,
py::list excluded_inds
):
_phi_0(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)),
_prop_train(python_conv_utils::from_list<double>(prop)),
......@@ -231,6 +233,7 @@ FeatureSpace::FeatureSpace(
_n_rung_generate(0),
_n_samp_train(_phi_0[0]->n_samp())
{
std::vector<int> ef = python_conv_utils::from_list<int>(excluded_inds);
#ifdef PARAMETERIZE
_max_param_depth = -1;
_reparam_residual = false;
......@@ -238,7 +241,7 @@ FeatureSpace::FeatureSpace(
comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp_train, _is_valid, _is_valid_feat_list);
mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select);
std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0);
std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0, ef);
phi_temp.insert(phi_temp.begin(), _phi_0.begin(), _phi_0.end());
_n_feat = phi_temp.size();
......
......@@ -21,9 +21,9 @@
#include "python/py_binding_cpp_def/feature_creation/node_utils.hpp"
py::list str2node::phi_selected_from_file_py(std::string filename, py::list phi_0)
py::list str2node::phi_selected_from_file_py(std::string filename, py::list phi_0, py::list excluded_inds)
{
std::vector<node_ptr> phi_selected = phi_selected_from_file(filename, python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0));
std::vector<node_ptr> phi_selected = phi_selected_from_file(filename, python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0), python_conv_utils::from_list<int>(excluded_inds));
py::list feat_lst;
for(auto& feat : phi_selected)
{
......
......@@ -35,10 +35,11 @@ namespace str2node
*
* @param filename The name of the feature_space/selected_features.txt file
* @param phi_0 The initial feature space
* @param excluded_inds (list) The list of primary feature indexes to not include in any features
*
* @return The selected feature set from the file as a python file
*/
py::list phi_selected_from_file_py(std::string filename, py::list phi_0);
py::list phi_selected_from_file_py(std::string filename, py::list phi_0, py::list excluded_inds = py::list());
}
#endif
......@@ -52,19 +52,22 @@ namespace
TEST_F(FeatCreationUtilsTest, TestPostfix2Node)
{
EXPECT_THROW(str2node::postfix2node("0|asdf", _phi0, _feat_ind), std::logic_error);
EXPECT_THROW(str2node::postfix2node("1|0|sq", _phi0, _feat_ind), std::logic_error);
std::vector<int> excluded_inds;
node_ptr test = str2node::postfix2node("0|2|div|exp|1|add", _phi0, _feat_ind);
EXPECT_THROW(str2node::postfix2node("0|asdf", _phi0, _feat_ind, excluded_inds), std::logic_error);
EXPECT_THROW(str2node::postfix2node("1|0|sq", _phi0, _feat_ind, excluded_inds), std::logic_error);
node_ptr test = str2node::postfix2node("0|2|div|exp|1|add", _phi0, _feat_ind, excluded_inds);
EXPECT_EQ(test->type(), NODE_TYPE::ADD);
EXPECT_EQ(test->rung(), 3);
EXPECT_LT(abs(test->value()[1] - (std::exp(2.0) + 2.0)), 1e-10);
EXPECT_STREQ(test->expr().c_str(), "(exp((A / C)) + B)");
}
TEST_F(FeatCreationUtilsTest, TestPhiSelFromFile)
{
std::vector<int> excluded_inds;
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open("phi_sel.txt");
out_file_stream << std::setw(14) <<std::left << "# FEAT_ID" << "Feature Postfix Expression (RPN)" << std::endl;
......@@ -88,7 +91,7 @@ namespace
out_file_stream << std::endl;
out_file_stream.close();
std::vector<node_ptr> phi_sel = str2node::phi_selected_from_file("phi_sel.txt", _phi0);
std::vector<node_ptr> phi_sel = str2node::phi_selected_from_file("phi_sel.txt", _phi0, excluded_inds);
ASSERT_EQ(phi_sel.size(), 10);
EXPECT_EQ(phi_sel[0]->type(), NODE_TYPE::MODEL_FEATURE);
......@@ -105,6 +108,8 @@ namespace
TEST_F(FeatCreationUtilsTest, TestPhiFromFile)
{
std::vector<int> excluded_inds;
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open("phi.txt");
out_file_stream << "0|2|div|exp|1|add" << std::endl;
......@@ -123,8 +128,8 @@ namespace
out_file_stream << std::endl;
out_file_stream.close();
EXPECT_THROW(str2node::phi_from_file("not_phi_file.txt", _phi0), std::logic_error);
std::vector<node_ptr> phi = str2node::phi_from_file("phi.txt", _phi0);
EXPECT_THROW(str2node::phi_from_file("not_phi_file.txt", _phi0, excluded_inds), std::logic_error);
std::vector<node_ptr> phi = str2node::phi_from_file("phi.txt", _phi0, excluded_inds);
ASSERT_EQ(phi.size(), 10);
EXPECT_EQ(phi[0]->type(), NODE_TYPE::ADD);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment