Commit 4229d2dc authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Allow for excluded_inds to be used when reading in a phi.txt

In case primary features need to be excluded
parent 53f9718b
......@@ -349,6 +349,7 @@ public:
* @param project_type (str) The type of loss function/projection operator to use
* @param n_sis_select (int) The number of features to select during each SIS step
* @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
* @param excluded_inds (list) The list of primary feature indexes to not include in any features
*/
FeatureSpace(
std::string feature_file,
......@@ -357,7 +358,8 @@ public:
py::list task_sizes_train,
std::string project_type="regression",
int n_sis_select=1,
double cross_corr_max=1.0
double cross_corr_max=1.0,
py::list excluded_inds = py::list()
);
// DocString: feat_space_init_file_py_list
......@@ -371,6 +373,7 @@ public:
* @param project_type (str) The type of loss function/projection operator to use
* @param n_sis_select (int) The number of features to select during each SIS step
* @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features
* @param excluded_inds (list) The list of primary feature indexes to not include in any features
*/
FeatureSpace(
std::string feature_file,
......@@ -379,7 +382,8 @@ public:
py::list task_sizes_train,
std::string project_type="regression",
int n_sis_select=1,
double cross_corr_max=1.0
double cross_corr_max=1.0,
py::list excluded_inds = py::list()
);
// DocString: feat_space_sis_arr
......
......@@ -30,13 +30,22 @@ void str2node::set_parameters(node_ptr feat, const std::vector<std::string> op_t
}
#endif
node_ptr str2node::postfix2node(const std::string postfix_expr, const std::vector<node_ptr>& phi_0, unsigned long int& feat_ind)
node_ptr str2node::postfix2node(const std::string postfix_expr, const std::vector<node_ptr>& phi_0, unsigned long int& feat_ind, const std::vector<int>& excluded_inds)
{
std::vector<node_ptr> stack;
std::vector<std::string> postfix_split = str_utils::split_string_trim(postfix_expr, "|");
if(postfix_split.size() == 1)
{
if(std::stoi(postfix_split[0]) >= phi_0.size())
{
throw std::logic_error("Accessing feature outside of phi_0");
}
if(std::find(excluded_inds.begin(), excluded_inds.end(), std::stoi(postfix_split[0])) != excluded_inds.end())
{
throw InvalidFeatureException();
return nullptr;
}
return phi_0[std::stoi(postfix_split[0])];
}
......@@ -49,6 +58,13 @@ node_ptr str2node::postfix2node(const std::string postfix_expr, const std::vecto
{
throw std::logic_error("Accessing feature outside of phi_0");
}
if(std::find(excluded_inds.begin(), excluded_inds.end(), std::stoi(postfix_split[0])) != excluded_inds.end())
{
throw InvalidFeatureException();
return nullptr;
}
stack.push_back(phi_0[std::stoi(term)]);
++feat_ind;
}
......@@ -349,7 +365,7 @@ node_ptr str2node::postfix2node(const std::string postfix_expr, const std::vecto
return stack[0];
}
std::vector<node_ptr> str2node::phi_selected_from_file(const std::string filename, const std::vector<node_ptr> phi_0)
std::vector<node_ptr> str2node::phi_selected_from_file(const std::string filename, const std::vector<node_ptr> phi_0, const std::vector<int>& excluded_inds)
{
node_value_arrs::resize_values_arr(0, phi_0.size());
node_value_arrs::initialize_d_matrix_arr();
......@@ -375,7 +391,7 @@ std::vector<node_ptr> str2node::phi_selected_from_file(const std::string filenam
node_value_arrs::resize_d_matrix_arr(1);
boost::algorithm::split(split_line, line, boost::algorithm::is_any_of("\t "), boost::token_compress_on);
node_ptr new_feat = postfix2node(split_line[1], phi_0, feat_ind);
node_ptr new_feat = postfix2node(split_line[1], phi_0, feat_ind, excluded_inds);
new_feat->set_selected(true);
new_feat->set_d_mat_ind(feat_sel);
......@@ -388,7 +404,7 @@ std::vector<node_ptr> str2node::phi_selected_from_file(const std::string filenam
return phi_selected;
}
std::vector<node_ptr> str2node::phi_from_file(const std::string filename, const std::vector<node_ptr> phi_0)
std::vector<node_ptr> str2node::phi_from_file(const std::string filename, const std::vector<node_ptr> phi_0, const std::vector<int>& excluded_inds)
{
node_value_arrs::resize_values_arr(0, phi_0.size());
......@@ -410,7 +426,7 @@ std::vector<node_ptr> str2node::phi_from_file(const std::string filename, const
}
try
{
node_ptr feat = postfix2node(line, phi_0, feat_ind);
node_ptr feat = postfix2node(line, phi_0, feat_ind, excluded_inds);
if(feat->type() == NODE_TYPE::FEAT)
{
continue;
......
......@@ -36,9 +36,11 @@ namespace str2node
* @param postfix_expr The postfix expression of the feature node
* @param phi_0 The primary feature space
* @param feat_ind The index of the new feature
* @param excluded_inds The list of primary feature indexes to not include in any features
*
* @return The feature node described by the postfix expression
*/
node_ptr postfix2node(const std::string postfix_expr, const std::vector<node_ptr>& phi_0, unsigned long int& feat_ind);
node_ptr postfix2node(const std::string postfix_expr, const std::vector<node_ptr>& phi_0, unsigned long int& feat_ind, const std::vector<int>& excluded_inds);
/**
* @brief Convert a feature_space/selected_features.txt into a phi_selected;
......@@ -46,10 +48,11 @@ node_ptr postfix2node(const std::string postfix_expr, const std::vector<node_ptr
*
* @param filename The name of the feature_space/selected_features.txt file
* @param phi_0 The initial feature space
* @param excluded_inds The list of primary feature indexes to not include in any features
*
* @return The selected feature set from the file
*/
std::vector<node_ptr> phi_selected_from_file(const std::string filename, const std::vector<node_ptr> phi_0);
std::vector<node_ptr> phi_selected_from_file(const std::string filename, const std::vector<node_ptr> phi_0, const std::vector<int>& excluded_inds);
/**
* @brief Convert a text file containing postfix expressions of features into the feature space
......@@ -57,10 +60,11 @@ std::vector<node_ptr> phi_selected_from_file(const std::string filename, const s
*
* @param filename The name of the file storing all the features
* @param phi_0 The initial feature space
* @param excluded_inds The list of primary feature indexes to not include in any features
*
* @return The feature set defined from the file
*/
std::vector<node_ptr> phi_from_file(const std::string filename, const std::vector<node_ptr> phi_0);
std::vector<node_ptr> phi_from_file(const std::string filename, const std::vector<node_ptr> phi_0, const std::vector<int>& excluded_inds);
#ifdef PARAMETERIZE
/**
......
......@@ -62,7 +62,7 @@ void sisso::register_all()
def(
"phi_selected_from_file",
&str2node::phi_selected_from_file_py,
(arg("filename"), arg("phi_0")),
(arg("filename"), arg("phi_0"), arg("excluded_inds")),
"@DocString_node_utils_phi_sel_from_file@"
);
......@@ -251,7 +251,7 @@ void sisso::feature_creation::registerFeatureSpace()
)
)
.def(
init<std::string, list, list, list, optional<std::string, int, double>>(
init<std::string, list, list, list, optional<std::string, int, double, py::list>>(
(
arg("self"),
arg("feature_file"),
......@@ -260,13 +260,14 @@ void sisso::feature_creation::registerFeatureSpace()
arg("task_sizes"),
arg("project_type")="regression",
arg("n_sis_select")=1,
arg("cross_corr_max")=1.0
arg("cross_corr_max")=1.0,
arg("excluded_inds")=py::list()
),
"@DocString_feat_space_init_file_py_list@"
)
)
.def(
init<std::string, list, np::ndarray, list, optional<std::string, int, double>>(
init<std::string, list, np::ndarray, list, optional<std::string, int, double, py::list>>(
(
arg("self"),
arg("feature_file"),
......@@ -275,7 +276,8 @@ void sisso::feature_creation::registerFeatureSpace()
arg("task_sizes"),
arg("project_type")="regression",
arg("n_sis_select")=1,
arg("cross_corr_max")=1.0
arg("cross_corr_max")=1.0,
arg("excluded_inds")=py::list()
),
"@DocString_feat_space_init_file_np_array@"
)
......
......@@ -28,7 +28,8 @@ FeatureSpace::FeatureSpace(
py::list task_sizes,
std::string project_type,
int n_sis_select,
double cross_corr_max
double cross_corr_max,
py::list excluded_inds
):
_phi_0(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)),
_prop_train(python_conv_utils::from_ndarray<double>(prop)),
......@@ -48,6 +49,7 @@ FeatureSpace::FeatureSpace(
_n_rung_generate(0),
_n_samp_train(_phi_0[0]->n_samp())
{
std::vector<int> ef = python_conv_utils::from_list<int>(excluded_inds);
#ifdef PARAMETERIZE
_max_param_depth = -1;
_reparam_residual = false;
......@@ -55,7 +57,7 @@ FeatureSpace::FeatureSpace(
comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp_train, _is_valid, _is_valid_feat_list);
mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select);
std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0);
std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0, ef);
phi_temp.insert(phi_temp.begin(), _phi_0.begin(), _phi_0.end());
_n_feat = phi_temp.size();
......@@ -210,7 +212,8 @@ FeatureSpace::FeatureSpace(
py::list task_sizes,
std::string project_type,
int n_sis_select,
double cross_corr_max
double cross_corr_max,
py::list excluded_inds
):
_phi_0(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)),
_prop_train(python_conv_utils::from_list<double>(prop)),
......@@ -230,6 +233,7 @@ FeatureSpace::FeatureSpace(
_n_rung_generate(0),
_n_samp_train(_phi_0[0]->n_samp())
{
std::vector<int> ef = python_conv_utils::from_list<int>(excluded_inds);
#ifdef PARAMETERIZE
_max_param_depth = -1;
_reparam_residual = false;
......@@ -237,7 +241,7 @@ FeatureSpace::FeatureSpace(
comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp_train, _is_valid, _is_valid_feat_list);
mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select);
std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0);
std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0, ef);
phi_temp.insert(phi_temp.begin(), _phi_0.begin(), _phi_0.end());
_n_feat = phi_temp.size();
......
......@@ -21,9 +21,9 @@
#include "python/py_binding_cpp_def/feature_creation/node_utils.hpp"
py::list str2node::phi_selected_from_file_py(std::string filename, py::list phi_0)
py::list str2node::phi_selected_from_file_py(std::string filename, py::list phi_0, py::list excluded_inds)
{
std::vector<node_ptr> phi_selected = phi_selected_from_file(filename, python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0));
std::vector<node_ptr> phi_selected = phi_selected_from_file(filename, python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0), python_conv_utils::from_list<int>(excluded_inds));
py::list feat_lst;
for(auto& feat : phi_selected)
{
......
......@@ -35,10 +35,11 @@ namespace str2node
*
* @param filename The name of the feature_space/selected_features.txt file
* @param phi_0 The initial feature space
* @param excluded_inds (list) The list of primary feature indexes to not include in any features
*
* @return The selected feature set from the file as a python file
*/
py::list phi_selected_from_file_py(std::string filename, py::list phi_0);
py::list phi_selected_from_file_py(std::string filename, py::list phi_0, py::list excluded_inds = py::list());
}
#endif
......@@ -52,19 +52,22 @@ namespace
TEST_F(FeatCreationUtilsTest, TestPostfix2Node)
{
EXPECT_THROW(str2node::postfix2node("0|asdf", _phi0, _feat_ind), std::logic_error);
EXPECT_THROW(str2node::postfix2node("1|0|sq", _phi0, _feat_ind), std::logic_error);
std::vector<int> excluded_inds;
node_ptr test = str2node::postfix2node("0|2|div|exp|1|add", _phi0, _feat_ind);
EXPECT_THROW(str2node::postfix2node("0|asdf", _phi0, _feat_ind, excluded_inds), std::logic_error);
EXPECT_THROW(str2node::postfix2node("1|0|sq", _phi0, _feat_ind, excluded_inds), std::logic_error);
node_ptr test = str2node::postfix2node("0|2|div|exp|1|add", _phi0, _feat_ind, excluded_inds);
EXPECT_EQ(test->type(), NODE_TYPE::ADD);
EXPECT_EQ(test->rung(), 3);
EXPECT_LT(abs(test->value()[1] - (std::exp(2.0) + 2.0)), 1e-10);
EXPECT_STREQ(test->expr().c_str(), "(exp((A / C)) + B)");
}
TEST_F(FeatCreationUtilsTest, TestPhiSelFromFile)
{
std::vector<int> excluded_inds;
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open("phi_sel.txt");
out_file_stream << std::setw(14) <<std::left << "# FEAT_ID" << "Feature Postfix Expression (RPN)" << std::endl;
......@@ -88,7 +91,7 @@ namespace
out_file_stream << std::endl;
out_file_stream.close();
std::vector<node_ptr> phi_sel = str2node::phi_selected_from_file("phi_sel.txt", _phi0);
std::vector<node_ptr> phi_sel = str2node::phi_selected_from_file("phi_sel.txt", _phi0, excluded_inds);
ASSERT_EQ(phi_sel.size(), 10);
EXPECT_EQ(phi_sel[0]->type(), NODE_TYPE::MODEL_FEATURE);
......@@ -105,6 +108,8 @@ namespace
TEST_F(FeatCreationUtilsTest, TestPhiFromFile)
{
std::vector<int> excluded_inds;
std::ofstream out_file_stream = std::ofstream();
out_file_stream.open("phi.txt");
out_file_stream << "0|2|div|exp|1|add" << std::endl;
......@@ -123,8 +128,8 @@ namespace
out_file_stream << std::endl;
out_file_stream.close();
EXPECT_THROW(str2node::phi_from_file("not_phi_file.txt", _phi0), std::logic_error);
std::vector<node_ptr> phi = str2node::phi_from_file("phi.txt", _phi0);
EXPECT_THROW(str2node::phi_from_file("not_phi_file.txt", _phi0, excluded_inds), std::logic_error);
std::vector<node_ptr> phi = str2node::phi_from_file("phi.txt", _phi0, excluded_inds);
ASSERT_EQ(phi.size(), 10);
EXPECT_EQ(phi[0]->type(), NODE_TYPE::ADD);
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment