diff --git a/src/.removed_features/log_fit_c_regressor/SISSOLogRegressorFitC.cpp b/src/.removed_features/log_fit_c_regressor/SISSOLogRegressorFitC.cpp index 57bdad0459c3977d4c58a2113d34504c51e6925b..ad5ad1acbd6dfe014b0656a0c3df066c8ba46e03 100644 --- a/src/.removed_features/log_fit_c_regressor/SISSOLogRegressorFitC.cpp +++ b/src/.removed_features/log_fit_c_regressor/SISSOLogRegressorFitC.cpp @@ -3,22 +3,24 @@ SISSOLogRegressorFitC::SISSOLogRegressorFitC( std::shared_ptr<FeatureSpace> feat_space, Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, + np::ndarray prop, + np::ndarray prop_test, + py::list task_sizes_train, + py::list task_sizes_test, + py::list leave_out_inds, int n_dim, int n_residual, int n_models_store, bool fix_intercept -): +) : SISSOLogRegressor(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept) { // fix intercept has to be true since its done via nlopt _fix_intercept = true; - std::copy_n(prop.data(), prop.size(), _prop.data()); + std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop); + + std::copy_n(prop_vec.data(), prop_vec.size(), _prop.data()); _ub_nl_opt.resize(_task_sizes_train.size() * 2, HUGE_VAL); int start = 0; for(int tt = 0; tt< _task_sizes_train.size(); ++tt) @@ -28,196 +30,45 @@ SISSOLogRegressorFitC::SISSOLogRegressorFitC( } } -void SISSOLogRegressorFitC::add_model(std::vector<int> indexes) -{ - if(_models.size() < indexes.size()) - _models.push_back({}); - std::vector<model_node_ptr> min_nodes(indexes.size()); - for(int ii = 0; ii < indexes.size(); ++ii) - { - int index = indexes[ii]; - min_nodes[ii] = std::make_shared<ModelNode>(_feat_space->phi_selected()[index]->arr_ind(), _feat_space->phi_selected()[index]->rung(), _feat_space->phi_selected()[index]->expr(), _feat_space->phi_selected()[index]->postfix_expr(), _feat_space->phi_selected()[index]->value(), _feat_space->phi_selected()[index]->test_value(), _feat_space->phi_selected()[index]->domain(), _feat_space->phi_selected()[index]->unit()); - } - ModelLogRegressorFitC model(_prop_unit, _prop, _prop_test, min_nodes, _task_sizes_train, _task_sizes_test, _fix_intercept); - _models.back().push_back(model); -} - -void SISSOLogRegressorFitC::output_models(std::vector<double>& residual) -{ - if(_mpi_comm->rank() == 0) - { - for(int rr = 0; rr < _n_models_store; ++rr) - { - _models.back()[rr].to_file("models/train_dim_" + std::to_string(_models.size()) + "_model_" + std::to_string(rr) + ".dat"); - if(_leave_out_inds.size() > 0) - _models.back()[rr].to_file("models/test_dim_" + std::to_string(_models.size()) + "_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds); - } - } - for(int rr = 0; rr < _n_residual; ++rr) - _models.back()[rr].copy_error(&residual[rr * _n_samp]); -} - -void SISSOLogRegressorFitC::set_error(std::vector<int>& inds, double* coefs, double* c_coefs, double* error) -{ - std::fill_n(error, _n_samp, 0.0); - for(int ii = 0; ii < inds.size(); ++ii) - std::transform(node_value_arrs::get_d_matrix_ptr(inds[ii]), node_value_arrs::get_d_matrix_ptr(inds[ii]) + _n_samp, error, error, [&coefs, &ii](double fv, double e){return e + coefs[ii] * std::log(fv);}); - - int start = 0; - for(int tt = 0; tt < _task_sizes_train.size(); ++tt) - { - std::transform(error + start, error + start + _task_sizes_train[tt], _prop.data() + start, error + start, [&c_coefs, &tt](double e, double p){return p - (c_coefs[2*tt] + c_coefs[2*tt+1] * std::exp(e));}); - start += _task_sizes_train[tt]; - } -} - -void SISSOLogRegressorFitC::least_squares(std::vector<int>& inds, double* coefs, std::vector<double>& c_coefs, double* a, double*b, double* work) +SISSOLogRegressorFitC::SISSOLogRegressorFitC( + std::shared_ptr<FeatureSpace> feat_space, + Unit prop_unit, + py::list prop, + py::list prop_test, + py::list task_sizes_train, + py::list task_sizes_test, + py::list leave_out_inds, + int n_dim, + int n_residual, + int n_models_store, + bool fix_intercept +) : + SISSOLogRegressor(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept) { - set_a(inds, 0, _n_samp, a); - - nlopt_wrapper::l0_data d; - d._prop = _prop.data(); - d._a = a; - d._n_feat = inds.size(); - - std::fill_n(c_coefs.data(), c_coefs.size(), 1.0); - for(int ub = 0; ub < _ub_nl_opt.size(); ub += 2) - c_coefs[ub] = std::min(0.0, _ub_nl_opt[ub] - 0.01); - - double minf; - - nlopt::opt opt(nlopt::LN_SBPLX, c_coefs.size()); - opt.set_min_objective(nlopt_wrapper::objective_log_least_sq, &d); - opt.set_maxeval(1000); - opt.set_xtol_rel(1e-6); - opt.set_upper_bounds(_ub_nl_opt); + // fix intercept has to be true since its done via nlopt + _fix_intercept = true; - try - { - nlopt::result result = opt.optimize(c_coefs, minf); - } - catch(std::exception &e) - { - std::fill_n(c_coefs.data(), _task_sizes_train.size(), 1.0); - dcopy_(_task_sizes_train.size(), nlopt_wrapper::_zeros.data(), 1, c_coefs.data(), 2); - for(int ub = 0; ub < _ub_nl_opt.size(); ub += 2) - c_coefs[ub] = std::min(c_coefs[ub], _ub_nl_opt[ub] - 0.01); - } + std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop); + std::copy_n(prop_vec.data(), prop_vec.size(), _prop.data()); + _ub_nl_opt.resize(_task_sizes_train.size() * 2, HUGE_VAL); int start = 0; - for(int tt = 0; tt < _task_sizes_train.size(); ++tt) + for(int tt = 0; tt< _task_sizes_train.size(); ++tt) { - std::transform(_prop.begin() + start, _prop.begin() + start + _task_sizes_train[tt], b + start, [&c_coefs, &tt](double p){return std::log((p - c_coefs[tt*2]) / c_coefs[tt*2 + 1]);}); + _ub_nl_opt[2*tt] = *std::min_element(_prop.begin() + start, _prop.begin() + start + _task_sizes_train[tt]); start += _task_sizes_train[tt]; } - - int info = 0; - int n_dim = inds.size(); - dgels_('N', _n_samp, n_dim, 1, a, _n_samp, b, _n_samp, work, _lwork, &info); - if(info == 0) - { - std::copy_n(b, n_dim, coefs); - } - else if(info > 0) - { - std::string err_msg = "Descriptor matrix is not full-rank. The features that caused this are: "; - for(auto& ind : inds) - err_msg += std::to_string(ind) + ": " + _feat_space->phi_selected()[ind]->expr() + ", "; - std::cerr << err_msg.substr(0, err_msg.size() - 2) << "." << std::endl; - std::fill_n(coefs, n_dim, std::numeric_limits<double>::infinity()); - } - else - { - std::fill_n(coefs, inds.size(), 0.0); - } } -void SISSOLogRegressorFitC::l0_norm(std::vector<double>& prop, int n_dim) +py::list SISSOLogRegressorFitC::models_py() { - int n_get_models = std::max(_n_residual, _n_models_store); - std::vector<double> coefs(n_dim, 0.0); - std::vector<double> c_coefs(_task_sizes_train.size() * 2, 0.0); - - std::vector<int> inds(n_dim, 0); - std::vector<int> min_inds(n_get_models * n_dim, -1); - - std::vector<double> min_errors(n_get_models, util_funcs::norm(_prop.data(), _n_samp)); - - unsigned long long int n_iterations = 1; - int n_dim_fact = 1; - for(int rr = 0; rr < n_dim; ++rr) + py::list model_list; + for(auto& mod_list : _models) { - inds[rr] = _feat_space->phi_selected().size() - 1 - rr; - n_iterations *= inds[rr] + 1; - n_dim_fact *= (rr + 1); - } - n_iterations /= n_dim_fact; - - if(inds.back() >= 0) - { - #pragma omp parallel shared(min_inds, min_errors, n_iterations, n_get_models, n_dim) firstprivate(inds, coefs, c_coefs) - { - int max_error_ind = 0; - std::vector<int> min_inds_private(min_inds); - std::vector<double> min_errors_private(min_errors); - - std::vector<double> error(_prop.size()); - std::vector<double> a(n_dim * _prop.size()); - std::vector<double> b(_prop.size()); - - _lwork = get_opt_lwork(n_dim, a.data(), b.data()); - std::vector<double> work(_lwork, 0.0); - - unsigned long long int ii_prev = 0; - - #pragma omp for schedule(monotonic: dynamic) - for(unsigned long long int ii = _mpi_comm->rank(); ii < n_iterations; ii += static_cast<unsigned long long int>(_mpi_comm->size())) - { - util_funcs::iterate(inds, inds.size(), ii - ii_prev); - least_squares(inds, coefs.data(), c_coefs, a.data(), b.data(), work.data()); - set_error(inds, coefs.data(), c_coefs.data(), error.data()); - - double rmse = util_funcs::norm(error) / std::sqrt(_n_samp); - if(rmse < min_errors_private[max_error_ind]) - { - min_errors_private[max_error_ind] = rmse; - std::copy_n(inds.begin(), inds.size(), min_inds_private.begin() + max_error_ind * n_dim); - max_error_ind = std::max_element(min_errors_private.begin(), min_errors_private.end()) - min_errors_private.begin(); - } - - ii_prev = ii; - } - - #pragma omp critical - { - max_error_ind = std::max_element(min_errors.begin(), min_errors.end()) - min_errors.begin(); - for(int ee = 0; ee < n_get_models; ++ee) - { - if(min_errors_private[ee] < min_errors[max_error_ind]) - { - min_errors[max_error_ind] = min_errors_private[ee]; - std::copy_n(min_inds_private.begin() + ee * n_dim, n_dim, min_inds.begin() + max_error_ind * n_dim); - max_error_ind = std::max_element(min_errors.begin(), min_errors.end()) - min_errors.begin(); - } - } - } - } - } - std::vector<double> all_min_error(_mpi_comm->size() * n_get_models); - std::vector<int> all_min_inds(_mpi_comm->size() * n_get_models * n_dim); - - mpi::all_gather(*_mpi_comm, min_errors.data(), n_get_models, all_min_error); - mpi::all_gather(*_mpi_comm, min_inds.data(), n_get_models * n_dim, all_min_inds); - - inds = util_funcs::argsort(all_min_error); - - for(int rr = 0; rr < n_get_models; ++rr) - { - std::vector<int> indexes(n_dim); - node_value_arrs::clear_temp_test_reg(); - for(int ii = 0; ii < n_dim; ++ii) - indexes[ii] = all_min_inds[inds[rr] * n_dim + ii]; - - add_model(indexes); + py::list lst; + for(auto& mod : mod_list) + lst.append<ModelLogRegressorFitC>(mod); + model_list.append<py::list>(lst); } + return model_list; } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 17a3929b835cb5d972789c5549b793d9e00dc459..aaffe0263d8475ee59bdc8816297a59a3692b283 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,3 @@ -include_directories(${CMAKE_CURRENT_LIST_DIR}/feature_creation/domain/) include_directories(${CMAKE_CURRENT_LIST_DIR}) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index eed693bae40bb880bc150e3133ddbf9c3358e196..bd43dfd04402dd7344487f5df09bb70d69459df1 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -129,11 +129,6 @@ void FeatureSpace::initialize_fs(std::string project_type) _project = project_funcs::project_log_r2; _project_no_omp = project_funcs::project_log_r2_no_omp; } - else if(project_type.compare("log_regression_fit_c") == 0) - { - _project = project_funcs::project_log_r2_fit_c; - _project_no_omp = project_funcs::project_log_r2_fit_c_no_omp; - } else throw std::logic_error("Wrong projection type passed to FeatureSpace constructor."); diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp index 2b66f2158c5ee4060e252d28424922df14f7b666..6236b6ea9acc75a4c64ef47cd391159fbd506fcc 100644 --- a/src/feature_creation/node/FeatureNode.cpp +++ b/src/feature_creation/node/FeatureNode.cpp @@ -3,21 +3,13 @@ FeatureNode::FeatureNode() {} -FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Domain domain, Unit unit, bool set_val) : +FeatureNode::FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool set_val) : Node(feat_ind, value.size(), test_value.size()), _value(value), _test_value(test_value), _unit(unit), - _domain(domain), _expr(expr) { - // if((*std::max_element(value.begin(), value.end()) > _domain + 1e-5) || (*std::min_element(value.begin(), value.end()) < _domain - 1e-5)) - // { - // double max_val = *std::max_element(value.begin(), value.end()); - // double min_val = *std::min_element(value.begin(), value.end()); - // throw std::logic_error("The feature " + expr + " has values outside of the domain of " + _domain.toString() + " (min: " + std::to_string(min_val) + ", max: " + std::to_string(max_val) + ")"); - // } - if(set_val) { set_value(); diff --git a/src/feature_creation/node/FeatureNode.hpp b/src/feature_creation/node/FeatureNode.hpp index 1f88aad5819980e90e3aff7cc4617c199cdf67cf..374e66e1f35225ede8032deb961dbb33236ec5de 100644 --- a/src/feature_creation/node/FeatureNode.hpp +++ b/src/feature_creation/node/FeatureNode.hpp @@ -43,7 +43,6 @@ class FeatureNode: public Node { ar & boost::serialization::base_object<Node>(*this); ar & _expr; - ar & _domain; ar & _unit; ar & _value; ar & _test_value; @@ -53,7 +52,6 @@ protected: std::vector<double> _value; //!< values for the feature std::vector<double> _test_value; //!< test values for the feature Unit _unit; //!< Unit for the feature - Domain _domain; //!< The domain of the feature std::string _expr; //!< Expression of the feature public: @@ -70,10 +68,9 @@ public: * @param expr Expression for the feature * @param value Value of the feature for each sample * @param value Value of the feature for each test sample - * @param domain the min and max of the feature as a domain object * @param unit Unit of the feature */ - FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Domain domain, Unit unit, bool set_val = true); + FeatureNode(int feat_ind, std::string expr, std::vector<double> value, std::vector<double> test_value, Unit unit, bool set_val = true); #ifdef PY_BINDINGS /** * @brief Constructs a feature node using numpy arrays (cpp definition in <python/feature_creation/FeatureNode.cpp) @@ -82,10 +79,9 @@ public: * @param expr Expression for the feature * @param value Value of the feature for each sample * @param value Value of the feature for each test sample - * @param domain the min and max of the feature as a domain object * @param unit Unit of the feature */ - FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Domain domain, Unit unit); + FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Unit unit); /** * @brief Constructs a feature node using Python lists (cpp definition in <python/feature_creation/FeatureNode.cpp) @@ -94,10 +90,9 @@ public: * @param expr Expression for the feature * @param value Value of the feature for each sample * @param value Value of the feature for each test sample - * @param domain the min and max of the feature as a domain object * @param unit Unit of the feature */ - FeatureNode(int feat_ind, std::string expr, py::list value, py::list test_value, Domain domain, Unit unit); + FeatureNode(int feat_ind, std::string expr, py::list value, py::list test_value, Unit unit); #endif /** @@ -225,12 +220,6 @@ public: */ inline int rung(int cur_rung = 0){return cur_rung;} - // DocString: feat_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return _domain;} - /** * @brief Get the primary feature decomposition of a feature * @return A map representing the primary feature comprising a feature @@ -349,14 +338,6 @@ public: */ inline double* test_value_ptr(const double* params, int offset = -1, int depth = 0){return test_value_ptr(offset);}; - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - Domain domain(double* params, int depth = 1){return _domain;}; - /** * @brief The expression of the feature * diff --git a/src/feature_creation/node/Node.hpp b/src/feature_creation/node/Node.hpp index 8ae193b8dd1610d2e6ba9e26e4a5d0bfc9f628de..26ce6f9470996c17960c06a111a7f3a6a77c3afa 100644 --- a/src/feature_creation/node/Node.hpp +++ b/src/feature_creation/node/Node.hpp @@ -11,7 +11,6 @@ #ifndef NODE #define NODE -#include <feature_creation/domain/Domain.hpp> #include <feature_creation/node/value_storage/nodes_value_containers.hpp> #include <feature_creation/units/Unit.hpp> #ifdef PY_BINDINGS @@ -274,12 +273,6 @@ public: */ virtual NODE_TYPE type() = 0; - // DocString: node_domain - /** - * @brief The domain for the feature (min/max values) - */ - virtual Domain domain() = 0; - // DocString: node_rung /** * @brief return the rung of the feature @@ -394,14 +387,6 @@ public: */ virtual double* test_value_ptr(const double* params, int offset = -1, int depth = 0) = 0; - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - virtual Domain domain(double* params, int depth = 1) = 0; - /** * @brief The expression of the feature * diff --git a/src/feature_creation/node/operator_nodes/OperatorNode.hpp b/src/feature_creation/node/operator_nodes/OperatorNode.hpp index debebf35c06b290400e12b99fd8789e38db6e048..c982c056013e26efdc6d5f19d7662f4133a27ffd 100644 --- a/src/feature_creation/node/operator_nodes/OperatorNode.hpp +++ b/src/feature_creation/node/operator_nodes/OperatorNode.hpp @@ -24,16 +24,6 @@ #ifdef PARAMETERIZE #include<nl_opt/NLOptWrapper.hpp> - -// #include "ceres/ceres.h" -// #include "glog/logging.h" - -// using ceres::AutoDiffCostFunction; -// using ceres::CauchyLoss; -// using ceres::CostFunction; -// using ceres::Problem; -// using ceres::Solver; -// using ceres::Solve; #endif #ifdef PY_BINDINGS @@ -241,12 +231,6 @@ public: */ virtual NODE_TYPE type() = 0; - // DocString: op_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - virtual Domain domain() = 0; - /** * @brief Get the primary feature decomposition of a feature * @return A map representing the primary feature comprising a feature diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp index 6e92d46954af5bc89525539bd1fe06de884daf4f..050a9bc28d2e7106da7bcfef4748ed46a2931fe8 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp @@ -1,9 +1,5 @@ #include <feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.hpp> -<<<<<<< HEAD:src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp - BOOST_SERIALIZATION_ASSUME_ABSTRACT(AbsNode) -======= ->>>>>>> 6e647d77e712fc63a3c941fa25f0b7491a60f5a2:src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.cpp void generateAbsNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat_ind, double l_bound, double u_bound) { diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.hpp index eca203c6abb0135b498865bfc3e56d68adc38236..be8b4e1527b155d850a952cfeb613cbcc694a524 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/absolute_value.hpp @@ -10,6 +10,7 @@ #define ABS_VAL_NODE #include <feature_creation/node/operator_nodes/OperatorNode.hpp> +#include <fmt/core.h> // DocString: cls_abs_node /** @@ -69,7 +70,13 @@ public: /** * @brief Get the expression for the overall feature (From root node down) */ - inline std::string expr(){return "|" + _feats[0]->expr() + "|";} + inline std::string expr() + { + return fmt::format( + "|{}|", + _feats[0]->expr() + ); + } // DocString: abs_node_latex_expr /** @@ -86,7 +93,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_value(int offset = -1); + virtual void set_value(int offset = -1); // DocString: abs_node_set_test_value /** @@ -94,7 +101,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_test_value(int offset = -1); + virtual void set_test_value(int offset = -1); // DocString: abs_node_rung /** @@ -133,6 +140,78 @@ public: * @param expected_abs_tot The expected absolute sum of all values in div_mult_leaves */ void update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot); + + #ifdef PARAMETERIZE + /** + * @brief The parameters used for introducing more non linearity in the operators + */ + virtual std::vector<double> parameters(){return {};} + + /** + * @brief Solve the non-linear optimization to set the parameters + * @details Fits the data points from _feats->value_ptr and prop to get the parameters for the feature + * + * @param prop property to fit to get the parameters + */ + virtual void get_parameters(std::vector<double>& prop){return;} + + /** + * @brief Set the non-linear parameters + */ + virtual void set_parameters(std::vector<double>){return;} + + /** + * @brief Set the values of the training data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief Set the values of the test data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_test_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief The expression of the feature + * + * @param params parameter values for non-linear operations + * @return feature expression + */ + inline std::string expr(double* params, int depth=1) + { + return fmt::format( + "|{:.10e} * {} {:+15.10e}|", + params[0], + (depth < nlopt_wrapper::_max_param_depth ? _feats[0]->expr(params + 2, depth + 1) : _feats[0]->expr()), + params[1] + ); + } + + /** + * @brief Set the bounds for the nl parameterization + * + * @param lb pointer to the lower bounds data + * @param ub pointer to the upper bounds data + */ + virtual void set_bounds(double* lb, double* ub, int from_parent=2, int depth = 1); + + /** + * @brief Calculates the derivative of an operation with respect to the parameters for a given sample + * + * @param params pointer to the parameters + * @param dfdp pointer to where the feature derivative pointers are located + */ + inline void param_derivative(const double* params, double* dfdp) + { + double* val_ptr = _feats[0]->value_ptr(params); + std::transform(val_ptr, val_ptr + _n_samp, dfdp, [params](double vp){return util_funcs::sign(params[0] * vp + params[1]);}); + } + #endif }; void generateAbsNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat_ind, double l_bound, double u_bound); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/parameterized_absolute_value.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/parameterized_absolute_value.hpp index 1e3b33cf2ce3c841e291e61c4ca1d8ff5f1e2617..89780b231e4367fa55cfa1e010726f6bcf8a035a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/parameterized_absolute_value.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs/parameterized_absolute_value.hpp @@ -39,7 +39,6 @@ protected: using AbsNode::set_test_value; using AbsNode::value_ptr; using AbsNode::test_value_ptr; - using AbsNode::domain; using AbsNode::expr; std::vector<double> _params; //!< The parameters vector double _sign_alpha; //!< 1 if alpha is positive, -1 if alpha is negative @@ -128,12 +127,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: abs_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.hpp index 57f9bc0bb43e73538c75da4ee9d1f9e2a01a9686..4a0d9f00f8001f210cdba7af83e7351a41f1f872 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/absolute_difference.hpp @@ -10,6 +10,7 @@ #define ABS_DIFF_NODE #include <feature_creation/node/operator_nodes/OperatorNode.hpp> +#include <fmt/core.h> // DocString: cls_abs_diff_node /** @@ -72,7 +73,14 @@ public: /** * @brief Get the expression for the overall feature (From root node down) */ - inline std::string expr(){return "|" + _feats[0]->expr() + " - (" + _feats[1]->expr() + ")|";} + inline std::string expr() + { + return fmt::format( + "|({}) - ({})|", + _feats[0]->expr(), + _feats[1]->expr() + ); + } // DocString: abs_diff_node_latex_expr /** @@ -89,7 +97,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_value(int offset = -1); + virtual void set_value(int offset = -1); // DocString: abs_diff_node_set_test_value /** @@ -97,7 +105,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_test_value(int offset = -1); + virtual void set_test_value(int offset = -1); // DocString: abs_diff_node_rung /** @@ -118,6 +126,12 @@ public: */ inline std::string get_postfix_term(){return "abd";} + /** + * @brief Check if the feature will be valid, if it is then set the value + * @return True if the feature is valid + */ + void check_feats(); + /** * @brief update the dictionary used to check if an Add/Sub node is valid * @@ -136,6 +150,80 @@ public: * @param expected_abs_tot The expected absolute sum of all values in div_mult_leaves */ void update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot); + + #ifdef PARAMETERIZE + /** + * @brief The parameters used for introducing more non linearity in the operators + */ + virtual std::vector<double> parameters(){return {};} + + /** + * @brief Solve the non-linear optimization to set the parameters + * @details Fits the data points from _feats->value_ptr and prop to get the parameters for the feature + * + * @param prop property to fit to get the parameters + */ + virtual void get_parameters(std::vector<double>& prop){return;} + + /** + * @brief Set the non-linear parameters + */ + virtual void set_parameters(std::vector<double>){return;} + + /** + * @brief Set the values of the training data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief Set the values of the test data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_test_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief The expression of the feature + * + * @param params parameter values for non-linear operations + * @return feature expression + */ + inline std::string expr(double* params, int depth=1) + { + return fmt::format( + "|({}) - ({:.10e} * {} {:+15.10e})|", + (depth < nlopt_wrapper::_max_param_depth ? _feats[0]->expr(params + _feats[1]->n_params() + 2, depth + 1) : _feats[0]->expr()), + params[0], + (depth < nlopt_wrapper::_max_param_depth ? _feats[1]->expr(params + 2, depth + 1) : _feats[1]->expr()), + params[1] + ); + } + + /** + * @brief Set the bounds for the nl parameterization + * + * @param lb pointer to the lower bounds data + * @param ub pointer to the upper bounds data + */ + void set_bounds(double* lb, double* ub, int from_parent=2, int depth = 1); + + /** + * @brief Calculates the derivative of an operation with respect to the parameters for a given sample + * + * @param params pointer to the parameters + * @param dfdp pointer to where the feature derivative pointers are located + */ + inline void param_derivative(const double* params, double* dfdp) + { + double* val_ptr_1 = _feats[0]->value_ptr(params, 2); + double* val_ptr_2 = _feats[1]->value_ptr(params, 1); + std::transform(val_ptr_1, val_ptr_1 + _n_samp, val_ptr_2, dfdp, [params](double vp_1, double vp_2){return -1.0 * util_funcs::sign(vp_1 - (params[0] * vp_2 + params[1]));}); + } + #endif }; void generateAbsDiffNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr feat_2, int& feat_ind, double l_bound, double u_bound); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/parameterized_absolute_difference.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/parameterized_absolute_difference.hpp index 059ca89a67ba8d8532dfabe00f4dba1fb226a4e1..5d65654bbec46f1ddfa0c2112aa015274491e811 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/parameterized_absolute_difference.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/abs_diff/parameterized_absolute_difference.hpp @@ -37,7 +37,6 @@ protected: using AbsDiffNode::set_test_value; using AbsDiffNode::value_ptr; using AbsDiffNode::test_value_ptr; - using AbsDiffNode::domain; using AbsDiffNode::expr; std::vector<double> _params; @@ -127,12 +126,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: abs_diff_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.hpp index cbddcf2245bf145722d71e99a014094aa6fab99d..f656e96ef957bf30d2a7091b758adf3906b96748 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/add.hpp @@ -10,6 +10,7 @@ #define ADD_NODE #include <feature_creation/node/operator_nodes/OperatorNode.hpp> +#include <fmt/core.h> // DocString: cls_add_node /** @@ -69,7 +70,14 @@ public: /** * @brief Get the expression for the overall feature (From root node down) */ - inline std::string expr(){return "(" + _feats[0]->expr() + " + " + _feats[1]->expr() + ")";} + inline std::string expr() + { + return fmt::format( + "[({}) + ({})]", + _feats[0]->expr(), + _feats[1]->expr() + ); + } // DocString: add_node_latex_expr /** @@ -86,7 +94,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_value(int offset = -1); + virtual void set_value(int offset = -1); // DocString: add_node_set_test_value /** @@ -94,7 +102,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_test_value(int offset = -1); + virtual void set_test_value(int offset = -1); // DocString: add_node_rung /** @@ -115,6 +123,12 @@ public: */ inline std::string get_postfix_term(){return "add";} + /** + * @brief Check if the feature will be valid, if it is then set the value + * @return True if the feature is valid + */ + void check_feats(); + /** * @brief update the dictionary used to check if an Add/Sub node is valid * @@ -133,6 +147,75 @@ public: * @param expected_abs_tot The expected absolute sum of all values in div_mult_leaves */ void update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot); + + #ifdef PARAMETERIZE + /** + * @brief The parameters used for introducing more non linearity in the operators + */ + virtual std::vector<double> parameters(){return {};} + + /** + * @brief Solve the non-linear optimization to set the parameters + * @details Fits the data points from _feats->value_ptr and prop to get the parameters for the feature + * + * @param prop property to fit to get the parameters + */ + virtual void get_parameters(std::vector<double>& prop){return;} + + /** + * @brief Set the non-linear parameters + */ + virtual void set_parameters(std::vector<double>){return;} + + /** + * @brief Set the values of the training data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief Set the values of the test data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_test_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief The expression of the feature + * + * @param params parameter values for non-linear operations + * @return feature expression + */ + inline std::string expr(double* params, int depth=1) + { + return fmt::format( + "[({}) + ({:.10e} * {} {:+15.10e})]", + (depth < nlopt_wrapper::_max_param_depth ? _feats[0]->expr(params + _feats[1]->n_params() + 2, depth + 1) : _feats[0]->expr()), + params[0], + (depth < nlopt_wrapper::_max_param_depth ? _feats[1]->expr(params + 2, depth + 1) : _feats[1]->expr()), + params[1] + ); + } + + /** + * @brief Set the bounds for the nl parameterization + * + * @param lb pointer to the lower bounds data + * @param ub pointer to the upper bounds data + */ + void set_bounds(double* lb, double* ub, int from_parent=2, int depth = 1); + + /** + * @brief Calculates the derivative of an operation with respect to the parameters for a given sample + * + * @param params pointer to the parameters + * @param dfdp pointer to where the feature derivative pointers are located + */ + inline void param_derivative(const double* params, double* dfdp){std::fill_n(dfdp, _n_samp, 1.0);} + #endif }; void generateAddNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr feat_2, int& feat_ind, double l_bound, double u_bound); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/parameterized_add.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/parameterized_add.hpp index f5b0d7b133a2d3e2d86bbbc76e4efd81c8dfc876..ddcc2e0dea9128b3867dd80fcf2d0183a5a21637 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/parameterized_add.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/add/parameterized_add.hpp @@ -38,7 +38,6 @@ protected: using AddNode::set_test_value; using AddNode::value_ptr; using AddNode::test_value_ptr; - using AddNode::domain; using AddNode::expr; std::vector<double> _params; @@ -128,12 +127,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: add_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.hpp index 42b3cbe3ed7517f3a6142b8feb1dace67bb0a7f1..243899bb34664140c648954a3d4ed0af1647f3df 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/cube.hpp @@ -114,12 +114,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::CB;} - // DocString: cb_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return _feats[0]->domain().pow(3.0, 1.0, 0.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -180,21 +174,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1) - { - if(depth < nlopt_wrapper::_max_param_depth) - return _feats[0]->domain(params + 2, depth + 1).pow(3.0, params[0], params[1]); - else - return _feats[0]->domain().pow(3.0, params[0], params[1]); - } - // inline Domain domain(double* params){return _feats[0]->domain(params + 2).pow(3.0, params[0], params[1]);} - /** * @brief The expression of the feature * @@ -210,7 +189,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")^3";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/parameterized_cube.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/parameterized_cube.hpp index 8b9cec6111c36ce8eb9842407fa5c4f194ac36c7..041d08d87196091cb653e69d2c5fa6c489aae445 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/parameterized_cube.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cb/parameterized_cube.hpp @@ -38,7 +38,6 @@ protected: using CbNode::set_test_value; using CbNode::value_ptr; using CbNode::test_value_ptr; - using CbNode::domain; using CbNode::expr; std::vector<double> _params; //!< The parameters vector @@ -127,12 +126,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: cb_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp index cb0d8c0d58820c4590fe7496950ab14978414fe0..a04caad0352a8c3c8dc7ad7645d83f0481d61ea8 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.cpp @@ -7,7 +7,12 @@ void generateCbrtNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat return; int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); + + double* val_ptr = feat->value_ptr(offset + 2); + if(*std::min_element(val_ptr, val_ptr + _n_samp) < 0.0) + return; + + val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); allowed_op_funcs::cbrt(feat->n_samp(), feat->value_ptr(offset + 2), 1.0, 0.0, val_ptr); if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return !std::isfinite(d) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) @@ -29,8 +34,9 @@ CbrtNode::CbrtNode(node_ptr feat, int feat_ind, double l_bound, double u_bound): if((feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::INV)) throw InvalidFeatureException(); - if((_feats[0]->domain() < 0.0)) - throw InvalidFeatureException(); + double* val_ptr = feat->value_ptr(rung() + 2); + if(*std::min_element(val_ptr, val_ptr + _n_samp) < 0.0) + throw ItureException(); set_value(); if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.hpp index 7268d5e35e4de6afb47e2bcd674c60970b244db9..49c6b4294bf23394e80e603c8f8c94bc25933e3b 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/cube_root.hpp @@ -114,12 +114,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::CBRT;} - // DocString: cbrt_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return _feats[0]->domain().pow(1.0 / 3.0, 1.0, 0.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -180,21 +174,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1) - { - if(depth < nlopt_wrapper::_max_param_depth) - return _feats[0]->domain(params + 2, depth + 1).pow(1.0 / 3.0, params[0], params[1]); - else - return _feats[0]->domain().pow(1.0 / 3.0, params[0], params[1]); - } - // inline Domain domain(double* params){return _feats[0]->domain(params + 2).pow(1.0 / 3.0, params[0], params[1]);} - /** * @brief The expression of the feature * @@ -210,7 +189,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "cbrt(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/parameterized_cube_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/parameterized_cube_root.cpp index c0ca608416483224ee0fd6e3649f376cbde88250..50a7436d991705937bfe5ab669cf0dac2f47702c 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/parameterized_cube_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/parameterized_cube_root.cpp @@ -7,9 +7,6 @@ void generateCbrtParamNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& ++feat_ind; node_ptr new_feat = std::make_shared<CbrtParamNode>(feat, feat_ind, prop); - // if(new_feat->parameters()[0] * feat->domain() + new_feat->parameters()[1] <= 0.0) - // return; - new_feat->set_value(); if(new_feat->is_nan() || new_feat->is_const() || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) > u_bound) || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) < l_bound)) return; @@ -26,8 +23,6 @@ CbrtParamNode::CbrtParamNode(node_ptr feat, int feat_ind, double l_bound, double { _params.resize(n_params(), 0.0); get_parameters(prop); - if((_params[0] * _feats[0]->domain() + _params[1]) <= 0.0) - throw InvalidFeatureException(); if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/parameterized_cube_root.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/parameterized_cube_root.hpp index 07a356ce861ea05f45381ac1efe18cd3686f666a..7b07225b2f7ed3f63012f2d4329d533a3acd8387 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/parameterized_cube_root.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cbrt/parameterized_cube_root.hpp @@ -39,7 +39,6 @@ protected: using CbrtNode::set_test_value; using CbrtNode::value_ptr; using CbrtNode::test_value_ptr; - using CbrtNode::domain; using CbrtNode::expr; std::vector<double> _params; //!< The parameters vector @@ -127,12 +126,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: cbrt_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.hpp index 7c947bb9e830a80c7c050f692e5dc3db3a92957b..d5358c85a25f1632a02679251346de8c6612c01c 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/cos.hpp @@ -114,12 +114,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::COS;} - // DocString: cos_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return Domain(-1.0, 1.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -180,14 +174,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1){return Domain(-1.0, 1.0);} - /** * @brief The expression of the feature * @@ -203,7 +189,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "cos(" + std::to_string(params[0]) + "*" + _feats[0]->expr( params + 2) + " + " + std::to_string(params[1]) + ")";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/parameterized_cos.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/parameterized_cos.hpp index af7cf0e761410c8217aa6d2b7e8865ebca467303..34bce5f94081eddce2f9d056d7cfb912bd84499c 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/parameterized_cos.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/cos/parameterized_cos.hpp @@ -38,7 +38,6 @@ protected: using CosNode::set_test_value; using CosNode::value_ptr; using CosNode::test_value_ptr; - using CosNode::domain; using CosNode::expr; std::vector<double> _params; //!< The parameters vector @@ -127,12 +126,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: cos_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp index 66aafbe0e4163ce902ed4406fab047eaccf555b5..d7707d587fae61b94e8ae12b8391ed298caa34d6 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.cpp @@ -6,9 +6,6 @@ void generateDivNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr if((feat_1->type() == NODE_TYPE::INV) || (feat_2->type() == NODE_TYPE::INV) || (feat_2->type() == NODE_TYPE::DIV)) return; - // if(feat_2->domain().contains(0.0)) - // return; - std::map<std::string, double> div_mult_leaves; double expected_abs_tot = 0.0; feat_1->update_div_mult_leaves(div_mult_leaves, 1.0, expected_abs_tot); @@ -40,9 +37,6 @@ DivNode::DivNode(node_ptr feat_1, node_ptr feat_2, int feat_ind, double l_bound, if((_feats[0]->type() == NODE_TYPE::INV) || (_feats[1]->type() == NODE_TYPE::INV) || (_feats[1]->type() == NODE_TYPE::DIV)) throw InvalidFeatureException(); - if((_feats[1]->domain().contains(0.0))) - throw InvalidFeatureException(); - std::map<std::string, double> div_mult_leaves; double expected_abs_tot = 0.0; update_div_mult_leaves(div_mult_leaves, 1.0, expected_abs_tot); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.hpp index f667f2ffbcc034499350cff2b24744e2d8b27afc..ff8ab9f731a9f3d3ffef504c687cca5ec3d6a102 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/divide.hpp @@ -10,6 +10,7 @@ #define DIV_NODE #include <feature_creation/node/operator_nodes/OperatorNode.hpp> +#include <fmt/core.h> // DocString: cls_div_node /** @@ -69,7 +70,14 @@ public: /** * @brief Get the expression for the overall feature (From root node down) */ - inline std::string expr(){return "[(" + _feats[0]->expr() + ") / (" + _feats[1]->expr() + ")]";} + inline std::string expr() + { + return fmt::format( + "[({}) / ({})]", + _feats[0]->expr(), + _feats[1]->expr() + ); + } // DocString: div_node_latex_expr /** @@ -86,7 +94,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_value(int offset = -1); + virtual void set_value(int offset = -1); // DocString: div_node_set_test_value /** @@ -94,7 +102,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_test_value(int offset = -1); + virtual void set_test_value(int offset = -1); // DocString: div_node_rung /** @@ -115,6 +123,12 @@ public: */ inline std::string get_postfix_term(){return "div";} + /** + * @brief Check if the feature will be valid, if it is then set the value + * @return True if the feature is valid + */ + void check_feats(); + /** * @brief update the dictionary used to check if an Add/Sub node is valid * @@ -133,6 +147,81 @@ public: * @param expected_abs_tot The expected absolute sum of all values in div_mult_leaves */ void update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot); + + #ifdef PARAMETERIZE + /** + * @brief The parameters used for introducing more non linearity in the operators + */ + virtual std::vector<double> parameters(){return {};} + + /** + * @brief Solve the non-linear optimization to set the parameters + * @details Fits the data points from _feats->value_ptr and prop to get the parameters for the feature + * + * @param prop property to fit to get the parameters + */ + virtual void get_parameters(std::vector<double>& prop){return;} + + /** + * @brief Set the non-linear parameters + */ + virtual void set_parameters(std::vector<double>){return;} + + /** + * @brief Set the values of the training data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief Set the values of the test data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_test_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief The expression of the feature + * + * @param params parameter values for non-linear operations + * @return feature expression + */ + inline std::string expr(double* params, int depth=1) + { + return fmt::format( + "[({}) / ({:.10e} * {} {:+15.10e})]", + (depth < nlopt_wrapper::_max_param_depth ? _feats[0]->expr(params + _feats[1]->n_params() + 2, depth + 1) : _feats[0]->expr()), + params[0], + (depth < nlopt_wrapper::_max_param_depth ? _feats[1]->expr(params + 2, depth + 1) : _feats[1]->expr()), + params[1] + ); + } + // inline std::string expr(double* params){return "[(" + _feats[0]->expr(params + _feats[1]->n_params() + 2) + ") / (" + std::to_string(params[0]) + "*" + _feats[1]->expr(params + 2) + " + " + std::to_string(params[1]) + ")]";} + + /** + * @brief Set the bounds for the nl parameterization + * + * @param lb pointer to the lower bounds data + * @param ub pointer to the upper bounds data + */ + void set_bounds(double* lb, double* ub, int from_parent=2, int depth = 1); + + /** + * @brief Calculates the derivative of an operation with respect to the parameters for a given sample + * + * @param params pointer to the parameters + * @param dfdp pointer to where the feature derivative pointers are located + */ + inline void param_derivative(const double* params, double* dfdp) + { + double* val_ptr_1 = _feats[0]->value_ptr(params, 2); + double* val_ptr_2 = _feats[1]->value_ptr(params, 1); + std::transform(val_ptr_1, val_ptr_1 + _n_samp, val_ptr_2, dfdp, [params](double vp_1, double vp_2){return -1.0 * vp_1 / std::pow(params[0] * vp_2 + params[1], 2.0);}); + } + #endif }; void generateDivNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr feat_2, int& feat_ind, double l_bound, double u_bound); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/parameterized_divide.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/parameterized_divide.cpp index 9d76bf71a7da4d7b58cc035b6f476c10ea6b756c..12b02bb2c73cddeb92315f6716dddb746a2f1f3b 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/parameterized_divide.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/parameterized_divide.cpp @@ -10,9 +10,6 @@ void generateDivParamNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, nod node_ptr new_feat = std::make_shared<DivParamNode>(feat_1, feat_2, feat_ind, prop); - // if((new_feat->parameters()[0] * feat_2->domain() + new_feat->parameters()[1]).contains(0.0)) - // return; - new_feat->set_value(); if(new_feat->is_nan() || new_feat->is_const() || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) > u_bound) || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) < l_bound)) return; @@ -32,9 +29,6 @@ DivParamNode::DivParamNode(node_ptr feat_1, node_ptr feat_2, int feat_ind, doubl _params.resize(n_params(), 0.0); get_parameters(prop); - if((_params[0] * _feats[1]->domain() + _params[1]).contains(0.0)) - throw InvalidFeatureException(); - set_value(); if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/parameterized_divide.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/parameterized_divide.hpp index 926165e4657eda28b22002bf3d4ce2168fdebc32..2085540b13ea452a89b1dd2bc7c153add8f4663f 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/parameterized_divide.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/div/parameterized_divide.hpp @@ -38,7 +38,6 @@ protected: using DivNode::set_test_value; using DivNode::value_ptr; using DivNode::test_value_ptr; - using DivNode::domain; using DivNode::expr; std::vector<double> _params; @@ -128,12 +127,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: div_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.hpp index 3095c5446ab0c0a66e6a707507ad5e7b0b080f88..623ee5759cf3bc0aa27d9ed0cffd9f25475dac39 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/exponential.hpp @@ -114,12 +114,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::EXP;} - // DocString: exp_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return _feats[0]->domain().exp(1.0, 0.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -180,21 +174,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1) - { - if(depth < nlopt_wrapper::_max_param_depth) - return _feats[0]->domain(params + 2, depth + 1).exp(params[0], params[1]); - else - return _feats[0]->domain().exp(params[0], params[1]); - } - // inline Domain domain(double* params){return _feats[0]->domain(params + 2).exp(params[0], params[1]);} - /** * @brief The expression of the feature * @@ -210,7 +189,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "exp(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/parameterized_exponential.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/parameterized_exponential.hpp index 6f3556d5902faa5e4799cc358090c1b5ef9751d0..6535364ecb6a4c0f178f89357bb827c43e59436a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/parameterized_exponential.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/exp/parameterized_exponential.hpp @@ -38,7 +38,6 @@ protected: using ExpNode::set_test_value; using ExpNode::value_ptr; using ExpNode::test_value_ptr; - using ExpNode::domain; using ExpNode::expr; std::vector<double> _params; //!< The parameters vector @@ -126,12 +125,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: exp_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp index 891a1ea5c89acb24cbaf766b70c7171eaf74c3f3..5a54ea39a943c5f3e586d8f73c00cc35fec1f8ff 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.cpp @@ -7,9 +7,6 @@ void generateInvNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat_ if((feat->type() == NODE_TYPE::DIV) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::INV)) return; - // if(feat->domain().contains(0.0)) - // return; - int offset = feat->rung() + 1; double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); allowed_op_funcs::inv(feat->n_samp(), feat->value_ptr(offset + 2), 1.0, 0.0, val_ptr); @@ -33,9 +30,6 @@ InvNode::InvNode(node_ptr feat, int feat_ind, double l_bound, double u_bound): if((feat->type() == NODE_TYPE::DIV) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::INV)) throw InvalidFeatureException(); - if((_feats[0]->domain().contains(0.0))) - throw InvalidFeatureException(); - set_value(); if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.hpp index fdfde7d3998d01641aba1a590e0e816f4330eb20..8a83ecc944a0ab8169db58a0f3d4dd1972827ccc 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/inverse.hpp @@ -1,7 +1,16 @@ +/** @file feature_creation/node/operator_nodes/allowed_operator_nodes/inverse.hpp + * @brief Class describing the logarithm operator + * + * This class represents the unary operator -> 1 / A + * + * @author Thomas A. R. Purcell (tpurcell) + * @bug No known bugs. + */ #ifndef INV_NODE #define INV_NODE #include <feature_creation/node/operator_nodes/OperatorNode.hpp> +#include <fmt/core.h> // DocString: cls_inv_node /** @@ -35,7 +44,7 @@ public: InvNode(node_ptr feat, int feat_ind); /** - * @brief Constructor + * @brief Constructor without checking feature values * @details Constructs the Node from node pointer of the feature to operate on * * @param feat shared_ptr of the feature to operate on (A) @@ -55,7 +64,13 @@ public: /** * @brief Get the expression for the overall feature (From root node down) */ - inline std::string expr(){return "1.0 / (" + _feats[0]->expr() + ")";} + inline std::string expr() + { + return fmt::format( + "1.0 / [{}]", + _feats[0]->expr() + ); + } // DocString: inv_node_latex_expr /** @@ -72,7 +87,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_value(int offset = -1); + virtual void set_value(int offset = -1); // DocString: inv_node_set_test_value /** @@ -80,7 +95,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_test_value(int offset = -1); + virtual void set_test_value(int offset = -1); // DocString: inv_node_rung /** @@ -119,6 +134,78 @@ public: * @param expected_abs_tot The expected absolute sum of all values in div_mult_leaves */ void update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot); + + #ifdef PARAMETERIZE + /** + * @brief The parameters used for introducing more non linearity in the operators + */ + virtual std::vector<double> parameters(){return {};} + + /** + * @brief Solve the non-linear optimization to set the parameters + * @details Fits the data points from _feats->value_ptr and prop to get the parameters for the feature + * + * @param prop property to fit to get the parameters + */ + virtual void get_parameters(std::vector<double>& prop){return;} + + /** + * @brief Set the non-linear parameters + */ + virtual void set_parameters(std::vector<double>){return;} + + /** + * @brief Set the values of the training data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief Set the values of the test data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_test_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief The expression of the feature + * + * @param params parameter values for non-linear operations + * @return feature expression + */ + inline std::string expr(double* params, int depth=1) + { + return fmt::format( + "1.0 / [{:.10e} * {} {:+15.10e}]", + params[0], + (depth < nlopt_wrapper::_max_param_depth ? _feats[0]->expr(params + 2, depth + 1) : _feats[0]->expr()), + params[1] + ); + } + + /** + * @brief Set the bounds for the nl parameterization + * + * @param lb pointer to the lower bounds data + * @param ub pointer to the upper bounds data + */ + void set_bounds(double* lb, double* ub, int from_parent=2, int depth = 1); + + /** + * @brief Calculates the derivative of an operation with respect to the parameters for a given sample + * + * @param params pointer to the parameters + * @param dfdp pointer to where the feature derivative pointers are located + */ + inline void param_derivative(const double* params, double* dfdp) + { + double* val_ptr = _feats[0]->value_ptr(params); + std::transform(val_ptr, val_ptr + _n_samp, dfdp, [params](double vp){return -1.0 / std::pow(params[0] * vp + params[1], 2.0);}); + } + #endif }; void generateInvNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat_ind, double l_bound, double u_bound); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/parameterized_inverse.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/parameterized_inverse.cpp index ad1b12c814737a651a9e745c7f5a65b501853b1b..a5da2982bc4b2affe70a1104f525173106b86349 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/parameterized_inverse.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/parameterized_inverse.cpp @@ -11,9 +11,6 @@ void generateInvParamNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& node_ptr new_feat = std::make_shared<InvParamNode>(feat, feat_ind, prop); - // if(new_feat->parameters()[0] * feat->domain() + new_feat->parameters()[1] <= 0.0) - // return; - new_feat->set_value(); if(new_feat->is_nan() || new_feat->is_const() || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) > u_bound) || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) < l_bound)) return; @@ -33,9 +30,6 @@ InvParamNode::InvParamNode(node_ptr feat, int feat_ind, double l_bound, double u _params.resize(n_params(), 0.0); get_parameters(prop); - if((_params[0] * _feats[0]->domain() + _params[1]).contains(0.0)) - throw InvalidFeatureException(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/parameterized_inverse.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/parameterized_inverse.hpp index b0cb4375d4281c35a24d854ff7574e007bc8c5b1..eb9aced77bd81e1e70c4d396a99e72f9e13c51fa 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/parameterized_inverse.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/inv/parameterized_inverse.hpp @@ -38,7 +38,6 @@ protected: using InvNode::set_test_value; using InvNode::value_ptr; using InvNode::test_value_ptr; - using InvNode::domain; using InvNode::expr; std::vector<double> _params; //!< The parameters vector @@ -126,12 +125,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: inv_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp index 52804a612570de0b568bb067499888b8e1d6ef13..74001c91c97dcbb57b11d6ce2f30f0d2beabf5e8 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.cpp @@ -6,9 +6,6 @@ void generateLogNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat_ if(feat->unit() != Unit() || (feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::DIV) || (feat->type() == NODE_TYPE::INV) || (feat->type() == NODE_TYPE::MULT) || (feat->type() == NODE_TYPE::LOG) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQRT)) return; - // if(feat->domain() <= 0.0) - // return; - int offset = feat->rung() + 1; double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); allowed_op_funcs::log(feat->n_samp(), feat->value_ptr(offset + 2), 1.0, 0.0, val_ptr); @@ -35,9 +32,6 @@ LogNode::LogNode(node_ptr feat, int feat_ind, double l_bound, double u_bound): if((feat->type() == NODE_TYPE::NEG_EXP) || (feat->type() == NODE_TYPE::EXP) || (feat->type() == NODE_TYPE::DIV) || (feat->type() == NODE_TYPE::INV) || (feat->type() == NODE_TYPE::MULT) || (feat->type() == NODE_TYPE::LOG) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQRT)) throw InvalidFeatureException(); - if((_feats[0]->domain() <= 0.0)) - throw InvalidFeatureException(); - set_value(); if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.hpp index 3440f5e9742209395a2974701f87052cbcb7724f..911331509bb59ce2ee21ac6411e7969dc3447b90 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/log.hpp @@ -114,12 +114,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::LOG;} - // DocString: log_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return _feats[0]->domain().log(1.0, 0.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -180,21 +174,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1) - { - if(depth < nlopt_wrapper::_max_param_depth) - return _feats[0]->domain(params + 2, depth + 1).log(params[0], params[1]); - else - return _feats[0]->domain().log(params[0], params[1]); - } - // inline Domain domain(double* params){return _feats[0]->domain(params + 2).log(params[0], params[1]);} - /** * @brief The expression of the feature * @@ -210,7 +189,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "log(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/parameterized_log.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/parameterized_log.cpp index d9440b18c83d419e0e76194820528dc1fcadb9e9..0b5e462a51da8ae3cef65f027fb23cd04f00aff9 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/parameterized_log.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/parameterized_log.cpp @@ -10,8 +10,6 @@ void generateLogParamNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& return; node_ptr new_feat = std::make_shared<LogParamNode>(feat, feat_ind, prop); - // if(new_feat->parameters()[0] * feat->domain() + new_feat->parameters()[1] <= 0.0) - // return; new_feat->set_value(); if(new_feat->is_nan() || new_feat->is_const() || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) > u_bound) || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) < l_bound)) @@ -32,9 +30,6 @@ LogParamNode::LogParamNode(node_ptr feat, int feat_ind, double l_bound, double u _params.resize(n_params(), 0.0); get_parameters(prop); - if((_params[0] * _feats[0]->domain() + _params[1]) <= 0.0) - throw InvalidFeatureException(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/parameterized_log.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/parameterized_log.hpp index 240095606c5f0fa0fe16e7bfbcb3b6080209fd11..6099216745ebfd678f61c8bb3c66703fb9059af7 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/parameterized_log.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/log/parameterized_log.hpp @@ -36,7 +36,6 @@ class LogParamNode: public LogNode protected: using LogNode::set_value; using LogNode::set_test_value; - using LogNode::domain; using LogNode::expr; using LogNode::value_ptr; using LogNode::test_value_ptr; @@ -126,12 +125,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: log_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.hpp index 89a840740f9936438444028c3efb375186dadf03..6bc2b642c177e969f9f9bcefe1797180c6cd12af 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/multiply.hpp @@ -10,6 +10,7 @@ #define MULT_NODE #include <feature_creation/node/operator_nodes/OperatorNode.hpp> +#include <fmt/core.h> // DocString: cls_multiplication_node /** @@ -70,7 +71,14 @@ public: /** * @brief Get the expression for the overall feature (From root node down) */ - inline std::string expr(){return "[(" + _feats[0]->expr() + ") * (" + _feats[1]->expr() + ")]";} + inline std::string expr() + { + return fmt::format( + "[({}) * ({})]", + _feats[0]->expr(), + _feats[1]->expr() + ); + } // DocString: mult_node_latex_expr /** @@ -87,7 +95,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_value(int offset = -1); + virtual void set_value(int offset = -1); // DocString: mult_node_set_test_value /** @@ -95,7 +103,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_test_value(int offset = -1); + virtual void set_test_value(int offset = -1); // DocString: mult_node_rung /** @@ -116,6 +124,12 @@ public: */ inline std::string get_postfix_term(){return "mult";} + /** + * @brief Check if the feature will be valid, if it is then set the value + * @return True if the feature is valid + */ + void check_feats(); + /** * @brief update the dictionary used to check if an Add/Sub node is valid * @@ -134,6 +148,75 @@ public: * @param expected_abs_tot The expected absolute sum of all values in div_mult_leaves */ void update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot); + + #ifdef PARAMETERIZE + /** + * @brief The parameters used for introducing more non linearity in the operators + */ + virtual std::vector<double> parameters(){return {};} + + /** + * @brief Solve the non-linear optimization to set the parameters + * @details Fits the data points from _feats->value_ptr and prop to get the parameters for the feature + * + * @param prop property to fit to get the parameters + */ + virtual void get_parameters(std::vector<double>& prop){return;} + + /** + * @brief Set the non-linear parameters + */ + virtual void set_parameters(std::vector<double>){return;} + + /** + * @brief Set the values of the training data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief Set the values of the test data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_test_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief The expression of the feature + * + * @param params parameter values for non-linear operations + * @return feature expression + */ + inline std::string expr(double* params, int depth=1) + { + return fmt::format( + "[({}) * ({:.10e} * {} {:+15.10e})]", + (depth < nlopt_wrapper::_max_param_depth ? _feats[0]->expr(params + _feats[1]->n_params() + 2, depth + 1) : _feats[0]->expr()), + params[0], + (depth < nlopt_wrapper::_max_param_depth ? _feats[1]->expr(params + 2, depth + 1) : _feats[1]->expr()), + params[1] + ); + } + + /** + * @brief Set the bounds for the nl parameterization + * + * @param lb pointer to the lower bounds data + * @param ub pointer to the upper bounds data + */ + void set_bounds(double* lb, double* ub, int from_parent=2, int depth = 1); + + /** + * @brief Calculates the derivative of an operation with respect to the parameters for a given sample + * + * @param params pointer to the parameters + * @param dfdp pointer to where the feature derivative pointers are located + */ + inline void param_derivative(const double* params, double* dfdp){std::copy_n(_feats[0]->value_ptr(params, 2), _n_samp, dfdp);} + #endif }; void generateMultNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr feat_2, int& feat_ind, double l_bound, double u_bound); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/parameterized_multiply.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/parameterized_multiply.hpp index 171bea8e63d07ede1a39b6b4a21d4719d4d458e2..376f7bc6691932d8e6320cfea5517db690ee61a0 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/parameterized_multiply.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/mult/parameterized_multiply.hpp @@ -38,7 +38,6 @@ protected: using MultNode::set_test_value; using MultNode::value_ptr; using MultNode::test_value_ptr; - using MultNode::domain; using MultNode::expr; std::vector<double> _params; @@ -126,12 +125,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: mult_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.hpp index 0dc53cfb43dbf8bf736d6ad85b1fb97353973453..18fbeba9b61ef472f542ff1e271e106260ec48af 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/negative_exponential.hpp @@ -115,12 +115,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::NEG_EXP;} - // DocString: neg_exp_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return _feats[0]->domain().exp(-1.0, 0.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -181,21 +175,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1) - { - if(depth < nlopt_wrapper::_max_param_depth) - return _feats[0]->domain(params + 2, depth + 1).exp(-1.0 * params[0], params[1]); - else - return _feats[0]->domain().exp(-1.0 * params[0], params[1]); - } - // inline Domain domain(double* params){return _feats[0]->domain(params + 2).exp(-1.0 * params[0], params[1]);} - /** * @brief The expression of the feature * @@ -211,7 +190,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "exp[-1.0*(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")]";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/parameterized_negative_exponential.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/parameterized_negative_exponential.hpp index 5f6c8e2a4b6d888b2b9aa21e83b9b5d95ca6b9f9..746d8512f2f9a6117287f912d433720b54597f96 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/parameterized_negative_exponential.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/neg_exp/parameterized_negative_exponential.hpp @@ -38,7 +38,6 @@ protected: using NegExpNode::set_test_value; using NegExpNode::value_ptr; using NegExpNode::test_value_ptr; - using NegExpNode::domain; using NegExpNode::expr; std::vector<double> _params; //!< The parameters vector @@ -126,12 +125,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: neg_exp_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/parameterized_sin.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/parameterized_sin.hpp index 1824afe51d6c0915afa6561fa21d08d9462a1585..599f5db5b52279a6a9ed983da0f11df81b6d249a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/parameterized_sin.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/parameterized_sin.hpp @@ -38,7 +38,6 @@ protected: using SinNode::set_test_value; using SinNode::value_ptr; using SinNode::test_value_ptr; - using SinNode::domain; using SinNode::expr; std::vector<double> _params; //!< The parameters vector @@ -126,12 +125,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: sin_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.hpp index d8960fe3434063e7ac862c6e35e61bd3e68f398c..93c2eecd25da0366ddf81c6f2cf1e3a6a4d1f946 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sin/sin.hpp @@ -115,12 +115,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::SIN;} - // DocString: sin_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return Domain(-1.0, 1.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -181,14 +175,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1){return Domain(-1.0, 1.0);} - /** * @brief The expression of the feature * @@ -204,7 +190,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "sin(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/six_pow/parameterized_sixth_power.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/six_pow/parameterized_sixth_power.hpp index 0ff6829da1242d3c51da22a5560bf43469d6cc66..083485bb8370bc2218ef2ac6b499e7618ae5eb95 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/six_pow/parameterized_sixth_power.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/six_pow/parameterized_sixth_power.hpp @@ -38,7 +38,6 @@ protected: using SixPowNode::set_test_value; using SixPowNode::value_ptr; using SixPowNode::test_value_ptr; - using SixPowNode::domain; using SixPowNode::expr; std::vector<double> _params; //!< The parameters vector @@ -127,12 +126,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: six_pow_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/six_pow/sixth_power.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/six_pow/sixth_power.hpp index 576c0f5b17f590b6bf4191d28ba33afc0a9fc9df..cb46b0e4e14d4ad42d3befe47b12c5fc26de91f4 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/six_pow/sixth_power.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/six_pow/sixth_power.hpp @@ -77,6 +77,15 @@ public: ); } + // DocString: six_pow_node_latex_expr + /** + * @brief Get the expression for the overall feature (From root node down) + */ + inline std::string get_latex_expr(std::string cap) + { + return cap + "\\left(" + _feats[0]->get_latex_expr("") + "^6\\right)" + cap; + } + // DocString: six_pow_node_set_value /** * @brief Set the values of the training data for the feature inside of the value storage arrays @@ -106,12 +115,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::SIX_POW;} - // DocString: six_pow_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return _feats[0]->domain().pow(6.0, 1.0, 0.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -172,21 +175,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1) - { - if(depth < nlopt_wrapper::_max_param_depth) - return _feats[0]->domain(params + 2, depth + 1).pow(6.0, params[0], params[1]); - else - return _feats[0]->domain().pow(6.0, params[0], params[1]); - } - // inline Domain domain(double* params){return _feats[0]->domain(params + 2).pow(6.0, params[0], params[1]);} - /** * @brief The expression of the feature * @@ -202,8 +190,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")^6";} - /** * @brief Set the bounds for the nl parameterization * diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp deleted file mode 100644 index ecaacee8678c5995004072a3f48d3184858bf7ad..0000000000000000000000000000000000000000 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include <feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.hpp> - -void generateSixPowNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat_ind, double l_bound, double u_bound) -{ - ++feat_ind; - if((feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQRT) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::INV)) - return; - - int offset = feat->rung() + 1; - double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); - allowed_op_funcs::sixth_pow(feat->n_samp(), feat->value_ptr(offset + 2), val_ptr); - - if((util_funcs::stand_dev(val_ptr, feat->n_samp()) < 1.0e-13) || std::any_of(val_ptr, val_ptr + feat->n_samp(), [&u_bound](double d){return (!std::isfinite(d)) || (std::abs(d) > u_bound);}) || (util_funcs::max_abs_val<double>(val_ptr, feat->n_samp()) < l_bound)) - return; - - feat_list.push_back(std::make_shared<SixPowNode>(feat, feat_ind)); -} - -SixPowNode::SixPowNode() -{} - -SixPowNode::SixPowNode(node_ptr feat, int feat_ind): - OperatorNode({feat}, feat_ind) -{} - -SixPowNode::SixPowNode(node_ptr feat, int feat_ind, double l_bound, double u_bound): - OperatorNode({feat}, feat_ind) -{ - if((feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::SQRT) || (feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::INV)) - throw InvalidFeatureException(); - - set_value(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) - throw InvalidFeatureException(); -} - -void SixPowNode::update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot) -{ - std::string key = expr(); - if(add_sub_leaves.count(key) > 0) - add_sub_leaves[key] += pl_mn; - else - add_sub_leaves[key] = pl_mn; - - ++expected_abs_tot; -} - -void SixPowNode::update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot) -{ - _feats[0]->update_div_mult_leaves(div_mult_leaves, fact * 6.0, expected_abs_tot); -} - -void SixPowNode::set_value(int offset) -{ - if(_selected) - allowed_op_funcs::sixth_pow(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_d_matrix_ptr(_d_mat_ind)); - - offset = (offset == -1) ? rung() : offset; - allowed_op_funcs::sixth_pow(_n_samp, _feats[0]->value_ptr(offset + 2), node_value_arrs::get_value_ptr(_arr_ind, _feat_ind, offset)); -} - -void SixPowNode::set_test_value(int offset) -{ - offset = (offset == -1) ? rung() : offset; - allowed_op_funcs::sixth_pow(_n_test_samp, _feats[0]->test_value_ptr(offset + 2), 1.0, 0.0, node_value_arrs::get_test_value_ptr(_arr_ind, _feat_ind, offset)); -} diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.hpp deleted file mode 100644 index 868ce0ed237c2bc526118adce3fd23a6b1c7f619..0000000000000000000000000000000000000000 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sp/sixth_power.hpp +++ /dev/null @@ -1,139 +0,0 @@ -/** @file feature_creation/node/operator_nodes/allowed_operator_nodes/sixth_power.hpp - * @brief Class describing the sixth power operator - * - * This class represents the unary operator -> (A)^6 - * - * @author Thomas A. R. Purcell (tpurcell) - * @bug No known bugs. - */ -#ifndef SIXTH_POW_NODE -#define SIXTH_POW_NODE - -#include <feature_creation/node/operator_nodes/OperatorNode.hpp> - -// DocString: cls_six_pow_node -/** - * @brief Node for the sixth power operator - * - */ -class SixPowNode: public OperatorNode<1> -{ - friend class boost::serialization::access; - - /** - * @brief Serialization function to send over MPI - * - * @param ar Archive representation of node - */ - template <typename Archive> - void serialize(Archive& ar, const unsigned int version) - { - ar & boost::serialization::base_object<OperatorNode>(*this); - } - -public: - /** - * @brief Base Constructor - * @details This is only used for serialization - */ - SixPowNode(); - - /** - * @brief Constructor - * @details Constructs the Node from an array of features to operate on - * - * @param feat shared_ptr of the feature to operate on (A) - * @param feat_ind Index of the new feature - */ - SixPowNode(node_ptr feat, int feat_ind); - - /** - * @brief Constructor - * @details Constructs the Node from node pointer of the feature to operate on - * - * @param feat shared_ptr of the feature to operate on (A) - * @param feat_ind Index of the new feature - * @param l_bound Minimum absolute value allowed for the feature. - * @param u_bound Maximum absolute value allowed for the feature. - */ - SixPowNode(node_ptr feat, int feat_ind, double l_bound, double u_bound); - - // DocString: six_pow_node_unit - /** - * @brief Get the unit of the feature (combine the units of _feats) - */ - inline Unit unit(){return _feats[0]->unit()^(6.0);} - - // DocString: six_pow_node_expr - /** - * @brief Get the expression for the overall feature (From root node down) - */ - inline std::string expr(){return "(" + _feats[0]->expr() + ")^6";} - - // DocString: six_pow_node_latex_expr - /** - * @brief Get the expression for the overall feature (From root node down) - */ - inline std::string get_latex_expr(std::string cap) - { - return cap + "\\left(" + _feats[0]->get_latex_expr("") + "^6\\right)" + cap; - } - - // DocString: six_pow_node_set_value - /** - * @brief Set the values of the training data for the feature inside of the value storage arrays - * - * @param offset(int) Key to determine which part of the temporary storage array to look into - */ - void set_value(int offset = -1); - - // DocString: six_pow_node_set_test_value - /** - * @brief Set the values of the test data for the feature inside of the value storage arrays - * - * @param offset(int) Key to determine which part of the temporary storage array to look into - */ - void set_test_value(int offset = -1); - - // DocString: six_pow_node_rung - /** - * @brief return the rung of the feature - * - * @param cur_rung The rung current rung of the feature tree (used to recursively calculate rung) - */ - inline int rung(int cur_rung=0){return _feats[0]->rung(cur_rung + 1);} - - /** - * @brief Returns the type of node this is - */ - inline NODE_TYPE type(){return NODE_TYPE::SIX_POW;} - - /** - * @brief Get the string character representation of the node for the postfix expression - * @return the string representation of the node for the postfix expression - */ - inline std::string get_postfix_term(){return "sp";} - - /** - * @brief update the dictionary used to check if an Add/Sub node is valid - * - * @param add_sub_leaves the dictionary used to check if an Add/Sub node is valid - * @param pl_mn if for an addition node: 1 if for a subtraction node: -1 - * @param expected_abs_tot The expected absolute sum of all values in add_sub_leaves - */ - void update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot); - - /** - * @brief update the dictionary used to check if - * @details [long description] - * - * @param add_sub_leaves [description] - * @param fact amount to increment the dictionary by - * @param expected_abs_tot The expected absolute sum of all values in div_mult_leaves - */ - void update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot); - -}; -void generateSixPowNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat_ind, double l_bound, double u_bound); - -#endif diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/parameterized_square.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/parameterized_square.hpp index d3eeb416645d8ca5d4207eaa73dab5c62aa0afe0..dd4efbcc83d1e5d2f0d6f492aa44dade67a2093a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/parameterized_square.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/parameterized_square.hpp @@ -38,7 +38,6 @@ protected: using SqNode::set_test_value; using SqNode::value_ptr; using SqNode::test_value_ptr; - using SqNode::domain; using SqNode::expr; std::vector<double> _params; //!< The parameters vector @@ -126,12 +125,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: sq_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.hpp index 281afea2047e06cba5616959750e5f7f921beaba..0329bd18333dff57afcf12dafa3050007953edf4 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sq/square.hpp @@ -114,12 +114,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::SQ;} - // DocString: sq_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return _feats[0]->domain().pow(2.0, 1.0, 0.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -180,21 +174,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1) - { - if(depth < nlopt_wrapper::_max_param_depth) - return _feats[0]->domain(params + 2, depth + 1).pow(2.0, params[0], params[1]); - else - return _feats[0]->domain().pow(2.0, params[0], params[1]); - } - // inline Domain domain(double* params){return _feats[0]->domain(params + 2).pow(2.0, params[0], params[1]);} - /** * @brief The expression of the feature * @@ -210,7 +189,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")^2";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/parameterized_square_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/parameterized_square_root.cpp index bbfc302a3bb833f28cbd127c2a349bd80aaaa331..dada6dd339d4043523e3ecac64e3898c1bb3764a 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/parameterized_square_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/parameterized_square_root.cpp @@ -7,9 +7,6 @@ void generateSqrtParamNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& ++feat_ind; node_ptr new_feat = std::make_shared<SqrtParamNode>(feat, feat_ind, prop); - // if(new_feat->parameters()[0] * feat->domain() + new_feat->parameters()[1] <= 0.0) - // return; - new_feat->set_value(); if(new_feat->is_nan() || new_feat->is_const() || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) > u_bound) || (util_funcs::max_abs_val<double>(new_feat->value_ptr(), new_feat->n_samp()) < l_bound)) return; @@ -27,9 +24,6 @@ SqrtParamNode::SqrtParamNode(node_ptr feat, int feat_ind, double l_bound, double _params.resize(n_params(), 0.0); get_parameters(prop); - if((_params[0] * _feats[0]->domain() + _params[1]) <= 0.0) - throw InvalidFeatureException(); - if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); } diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/parameterized_square_root.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/parameterized_square_root.hpp index c85c5774581c61920b5c359343e1ec4b6073dd13..104ad7c4859971fe581e41764f501f7beb696c60 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/parameterized_square_root.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/parameterized_square_root.hpp @@ -39,7 +39,6 @@ protected: using SqrtNode::set_test_value; using SqrtNode::value_ptr; using SqrtNode::test_value_ptr; - using SqrtNode::domain; using SqrtNode::expr; std::vector<double> _params; //!< The parameters vector @@ -128,12 +127,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: sqrt_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp index ece4fcb0f61b43f456ebd41ef13f8f2d53dd5497..0c918271885c2aa4434f258917c0b7c9f433ddfe 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.cpp @@ -6,9 +6,6 @@ void generateSqrtNode(std::vector<node_ptr>& feat_list, node_ptr feat, int& feat if((feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::INV)) return; - // if(feat->domain() < 0.0) - // return; - int offset = feat->rung() + 1; double* val_ptr = node_value_arrs::get_value_ptr(feat_ind, feat_ind, offset); allowed_op_funcs::sqrt(feat->n_samp(), feat->value_ptr(offset + 2), 1.0, 0.0, val_ptr); @@ -32,9 +29,6 @@ SqrtNode::SqrtNode(node_ptr feat, int feat_ind, double l_bound, double u_bound): if((feat->type() == NODE_TYPE::SQ) || (feat->type() == NODE_TYPE::CB) || (feat->type() == NODE_TYPE::SIX_POW) || (feat->type() == NODE_TYPE::CBRT) || (feat->type() == NODE_TYPE::INV)) throw InvalidFeatureException(); - if((_feats[0]->domain() < 0.0)) - throw InvalidFeatureException(); - set_value(); if(is_nan() || is_const() || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) > u_bound) || (util_funcs::max_abs_val<double>(value_ptr(), _n_samp) < l_bound)) throw InvalidFeatureException(); diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.hpp index f479dc00a20649229626dff8746771095926ab87..9643707a17d35e78e03de5a18b16caef1b3126a5 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sqrt/square_root.hpp @@ -115,12 +115,6 @@ public: */ inline NODE_TYPE type(){return NODE_TYPE::SQRT;} - // DocString: sqrt_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - Domain domain(){return _feats[0]->domain().pow(0.5, 1.0, 0.0);} - /** * @brief Get the string character representation of the node for the postfix expression * @return the string representation of the node for the postfix expression @@ -181,21 +175,6 @@ public: */ void set_test_value(const double* params, int offset=-1, int depth=1); - /** - * @brief The domain for the feature (min/max values) - * - * @param params parameter values for non-linear operations - * @return the domain - */ - inline Domain domain(double* params, int depth=1) - { - if(depth < nlopt_wrapper::_max_param_depth) - return _feats[0]->domain(params + 2, depth + 1).pow(0.5, params[0], params[1]); - else - return _feats[0]->domain().pow(0.5, params[0], params[1]); - } - // inline Domain domain(double* params){return _feats[0]->domain(params + 2).pow(1.0 / 2.0, params[0], params[1]);} - /** * @brief The expression of the feature * @@ -211,7 +190,6 @@ public: params[1] ); } - // inline std::string expr(double* params){return "sqrt(" + std::to_string(params[0]) + "*" + _feats[0]->expr(params + 2) + " + " + std::to_string(params[1]) + ")";} /** * @brief Set the bounds for the nl parameterization diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/parameterized_subtract.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/parameterized_subtract.hpp index 611bdb04ddf8c6e2c616bd3e623c278b3c5ee654..1b092de7e46681c4ec7fd5ddabf575db682802ed 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/parameterized_subtract.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/parameterized_subtract.hpp @@ -38,7 +38,6 @@ protected: using SubNode::set_test_value; using SubNode::value_ptr; using SubNode::test_value_ptr; - using SubNode::domain; using SubNode::expr; std::vector<double> _params; @@ -128,12 +127,6 @@ public: */ inline std::string expr(){return expr(_params.data());} - // DocString: sub_param_node_domain - /** - * @brief The domain for the feature (min/max values) - */ - inline Domain domain(){return domain(_params.data());} - /** * @brief The parameters used for introducing more non linearity in the operators */ diff --git a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.hpp b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.hpp index 525892b213c7ec93d868862b941880fb990107b7..7024247031986f409208535cfdba51a013655c89 100644 --- a/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.hpp +++ b/src/feature_creation/node/operator_nodes/allowed_operator_nodes/sub/subtract.hpp @@ -10,6 +10,7 @@ #define SUB_NODE #include <feature_creation/node/operator_nodes/OperatorNode.hpp> +#include <fmt/core.h> // DocString: cls_sub_node /** @@ -70,7 +71,14 @@ public: /** * @brief Get the expression for the overall feature (From root node down) */ - inline std::string expr(){return "[(" + _feats[0]->expr() + ") - (" + _feats[1]->expr() + ")]";} + inline std::string expr() + { + return fmt::format( + "[({}) - ({})]", + _feats[0]->expr(), + _feats[1]->expr() + ); + } // DocString: sub_node_latex_expr /** @@ -87,7 +95,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_value(int offset = -1); + virtual void set_value(int offset = -1); // DocString: sub_node_set_test_value /** @@ -95,7 +103,7 @@ public: * * @param offset(int) Key to determine which part of the temporary storage array to look into */ - void set_test_value(int offset = -1); + virtual void set_test_value(int offset = -1); // DocString: sub_node_rung /** @@ -116,6 +124,12 @@ public: */ inline std::string get_postfix_term(){return "sub";} + /** + * @brief Check if the feature will be valid, if it is then set the value + * @return True if the feature is valid + */ + void check_feats(); + /** * @brief update the dictionary used to check if an Add/Sub node is valid * @@ -134,6 +148,75 @@ public: * @param expected_abs_tot The expected absolute sum of all values in div_mult_leaves */ void update_div_mult_leaves(std::map<std::string, double>& div_mult_leaves, double fact, double& expected_abs_tot); + + #ifdef PARAMETERIZE + /** + * @brief The parameters used for introducing more non linearity in the operators + */ + virtual std::vector<double> parameters(){return {};} + + /** + * @brief Solve the non-linear optimization to set the parameters + * @details Fits the data points from _feats->value_ptr and prop to get the parameters for the feature + * + * @param prop property to fit to get the parameters + */ + virtual void get_parameters(std::vector<double>& prop){return;} + + /** + * @brief Set the non-linear parameters + */ + virtual void set_parameters(std::vector<double>){return;} + + /** + * @brief Set the values of the training data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief Set the values of the test data for the feature inside of the value storage arrays + * + * @param offset(int) Key to determine which part of the temporary storage array to look into + * @param params pointer to the parameter values + */ + void set_test_value(const double* params, int offset=-1, int depth=1); + + /** + * @brief The expression of the feature + * + * @param params parameter values for non-linear operations + * @return feature expression + */ + inline std::string expr(double* params, int depth=1) + { + return fmt::format( + "[({}) - ({:.10e} * {} {:+15.10e})]", + (depth < nlopt_wrapper::_max_param_depth ? _feats[0]->expr(params + _feats[1]->n_params() + 2, depth + 1) : _feats[0]->expr()), + params[0], + (depth < nlopt_wrapper::_max_param_depth ? _feats[1]->expr(params + 2, depth + 1) : _feats[1]->expr()), + params[1] + ); + } + + /** + * @brief Set the bounds for the nl parameterization + * + * @param lb pointer to the lower bounds data + * @param ub pointer to the upper bounds data + */ + void set_bounds(double* lb, double* ub, int from_parent=2, int depth = 1); + + /** + * @brief Calculates the derivative of an operation with respect to the parameters for a given sample + * + * @param params pointer to the parameters + * @param dfdp pointer to where the feature derivative pointers are located + */ + inline void param_derivative(const double* params, double* dfdp){std::fill_n(dfdp, _n_samp, -1.0);} + #endif }; void generateSubNode(std::vector<node_ptr>& feat_list, node_ptr feat_1, node_ptr feat_2, int& feat_ind, double l_bound, double u_bound); diff --git a/src/inputs/InputParser.cpp b/src/inputs/InputParser.cpp index 276431a96b5eca5e1f1d5e6a863ce57644115e94..fc93472d833496fa64274a6cf2bc0e8dd527ad56 100644 --- a/src/inputs/InputParser.cpp +++ b/src/inputs/InputParser.cpp @@ -36,18 +36,10 @@ InputParser::InputParser(pt::ptree IP, std::string fn, std::shared_ptr<MPI_Inter std::vector<std::string> headers; std::vector<Unit> units; - std::vector<std::shared_ptr<Domain>> domain_list; for(int dd = 1; dd < split_line.size(); ++dd) { - std::vector<std::string> name_domain_split; - boost::algorithm::split(name_domain_split, split_line[dd], boost::algorithm::is_any_of(":")); - if(name_domain_split.size() > 1) - domain_list.push_back(std::make_shared<Domain>(name_domain_split[1])); - else - domain_list.push_back(nullptr); - std::vector<std::string> name_unit_split; - boost::algorithm::split(name_unit_split, name_domain_split[0], boost::algorithm::is_any_of("()")); + boost::algorithm::split(name_unit_split, split_line[dd], boost::algorithm::is_any_of("()")); if(name_unit_split.size() == 1) { boost::algorithm::trim(split_line[dd]); @@ -174,10 +166,10 @@ InputParser::InputParser(pt::ptree IP, std::string fn, std::shared_ptr<MPI_Inter }; } - generate_feature_space(comm, headers, units, domain_list, tasks, taskind); + generate_feature_space(comm, headers, units, tasks, taskind); } -void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm, std::vector<std::string> headers, std::vector<Unit> units, std::vector<std::shared_ptr<Domain>> domain_list, std::map<std::string, std::vector<int>> tasks, int taskind) +void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm, std::vector<std::string> headers, std::vector<Unit> units, std::map<std::string, std::vector<int>> tasks, int taskind) { std::string line; std::ifstream data_stream; @@ -277,7 +269,6 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm, st test_data.erase(test_data.begin() + propind); headers.erase(headers.begin() + propind); units.erase(units.begin() + propind); - domain_list.erase(domain_list.begin() + propind); } if(taskind > propind) @@ -289,26 +280,12 @@ void InputParser::generate_feature_space(std::shared_ptr<MPI_Interface> comm, st test_data.erase(test_data.begin() + taskind); headers.erase(headers.begin() + taskind); units.erase(units.begin() + taskind); - domain_list.erase(domain_list.begin() + taskind); } node_value_arrs::initialize_values_arr(_prop_train.size(), _prop_test.size(), headers.size()); std::vector<node_ptr> phi_0; for(int ff = 0; ff < headers.size(); ++ff) - { - if(domain_list[ff] == nullptr) - { - std::array<double, 2> end_points; - end_points[0] = *std::min_element(data[ff].begin(), data[ff].end()); - end_points[1] = *std::max_element(data[ff].begin(), data[ff].end()); - - domain_list[ff] = std::make_shared<Domain>(end_points); - } - phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], test_data[ff], *domain_list[ff], units[ff])); - } - - if(_calc_type.compare("log_regression_fit_c") == 0) - nlopt_wrapper::setup_data(_task_sizes_train, _n_dim, _max_rung, _max_param_depth); + phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], test_data[ff], units[ff])); _feat_space = std::make_shared<FeatureSpace>(comm, phi_0, _opset, _param_opset, _prop_train, _task_sizes_train, _calc_type, _max_rung, _n_sis_select, _max_store_rung, _n_rung_generate, _cross_cor_max, _l_bound, _u_bound); } diff --git a/src/inputs/InputParser.hpp b/src/inputs/InputParser.hpp index 29ef832540bb04ce75446bbb927d7a0131722098..73c64ae3ff5287b50d8b86bb43fb89088c086ce9 100644 --- a/src/inputs/InputParser.hpp +++ b/src/inputs/InputParser.hpp @@ -95,7 +95,7 @@ public: * @param tasks map where keys are the task name and values are the number of samples in each task * @param taskind index in the columns that correspond the the task column */ - void generate_feature_space(std::shared_ptr<MPI_Interface> comm, std::vector<std::string> headers, std::vector<Unit> units, std::vector<std::shared_ptr<Domain>> domain_list, std::map<std::string, std::vector<int>> tasks, int taskind); + void generate_feature_space(std::shared_ptr<MPI_Interface> comm, std::vector<std::string> headers, std::vector<Unit> units, std::map<std::string, std::vector<int>> tasks, int taskind); }; /** * @brief strips comments from the input file diff --git a/src/main.cpp b/src/main.cpp index 2d24abab6177fa56e80e2d587fbf301f57a7ce42..fd0b68c463c906f2e53dc2e6f7ef8f44768e95f2 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,7 +1,6 @@ #include <inputs/InputParser.hpp> #include <descriptor_identifier/SISSO_DI/SISSORegressor.hpp> #include <descriptor_identifier/SISSO_DI/SISSOLogRegressor.hpp> -#include <descriptor_identifier/SISSO_DI/SISSOLogRegressorFitC.hpp> #include <descriptor_identifier/SISSO_DI/SISSOClassifier.hpp> #include <iostream> // #include <gflags/gflags.h> @@ -82,24 +81,6 @@ int main(int argc, char const *argv[]) } } } - if(IP._calc_type.compare("log_regression_fit_c") == 0) - { - SISSOLogRegressorFitC sisso(IP._feat_space, IP._prop_unit, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._leave_out_inds, IP._n_dim, IP._n_residuals, IP._n_models_store, IP._fix_intercept); - sisso.fit(); - - if(mpi_setup::comm->rank() == 0) - { - for(int ii = 0; ii < sisso.models().size(); ++ii) - { - std::cout << "Train RMSE: " << sisso.models()[ii][0].rmse(); - if(IP._prop_test.size() > 0) - std::cout << "; Test RMSE: " << sisso.models()[ii][0].test_rmse() << std::endl; - else - std::cout << std::endl; - std::cout << sisso.models()[ii][0] << "\n" << std::endl; - } - } - } else if(IP._calc_type.compare("classification") == 0) { SISSOClassifier sisso(IP._feat_space, IP._prop_label, IP._prop_unit, IP._prop_train, IP._prop_test, IP._task_sizes_train, IP._task_sizes_test, IP._leave_out_inds, IP._n_dim, IP._n_residuals, IP._n_models_store); diff --git a/src/nl_opt/NLOptWrapper.cpp b/src/nl_opt/NLOptWrapper.cpp index c2489c1031132569a78f357b81029c1961e723b3..aca63c39b7fcb89a1dcaf6046c898dd8a08fc5bc 100644 --- a/src/nl_opt/NLOptWrapper.cpp +++ b/src/nl_opt/NLOptWrapper.cpp @@ -58,11 +58,6 @@ void nlopt_wrapper::set_objective(std::string calc_type, double* prop, std::vect _objective = objective_log_reg; setup_data(sizes, 1, n_rung, max_param_depth); } - else if(calc_type.compare("log_regression_fit_c") == 0) - { - _objective = objective_log_reg; - _local_opt_alg = nlopt::LN_SBPLX; - } else { throw std::logic_error("projection type can not determined"); diff --git a/src/nl_opt/NLOptWrapper.hpp b/src/nl_opt/NLOptWrapper.hpp index dbe049b1573e8437a3751ac974a665571f023a3b..ecde4a88af28a4818caba1074086bf1a029372f6 100644 --- a/src/nl_opt/NLOptWrapper.hpp +++ b/src/nl_opt/NLOptWrapper.hpp @@ -44,38 +44,6 @@ namespace nlopt_wrapper Node* _feat; //!< Node pointer of the feature to parameterize } feat_data; - static double objective_log_least_sq(unsigned int n, const double* p, double* grad, void* data) - { - l0_data* d = (l0_data*) data; - - std::copy_n(d->_a, d->_n_feat * _n_samp, _a_copy.data()); - - int start = 0; - - for(int tt = 0; tt < _task_sizes.size(); ++tt) - { - std::transform(d->_prop + start, d->_prop + start + _task_sizes[tt], _log_trans.data() + start, [&](double prop){return std::log((prop - p[2*tt]) / p[2*tt+1]);}); - start += _task_sizes[tt]; - } - int info = 0; - if(std::any_of(_log_trans.begin(), _log_trans.end(), [](double lt){return !std::isfinite(lt);})) - return 1e100; - dgels_('N', _n_samp, d->_n_feat, 1, _a_copy.data(), _n_samp, _log_trans.data(), _n_samp, _work.data(), _n_samp, &info); - - std::fill_n(_work.data(), _n_samp, 0.0); - for(int ff = 0; ff < d->_n_feat; ++ff) - daxpy_(_n_samp, 1.0 * _log_trans[ff], d->_a + _n_samp * ff, 1, _work.data(), 1); - - start = 0; - for(int tt = 0; tt < _task_sizes.size(); ++tt) - { - - std::transform(_work.data() + start, _work.data() + start + _task_sizes[tt], d->_prop + start, _log_trans.data() + start, [&](double logD, double pa){return std::exp(logD) * p[2*tt+1] + p[2*tt] - pa;}); - start += _task_sizes[tt]; - } - double score = std::inner_product(_log_trans.begin(), _log_trans.end(), _log_trans.begin(), 0.0); - return (!std::isfinite(score) * 1e100) + std::sqrt(score / _n_samp); - } static double objective_class(unsigned int n, const double* p, double* grad, void* data) { diff --git a/src/python/__init__.py b/src/python/__init__.py index 5b4341e16f4df4d7cf8deafc3ce7b3605afb8ddd..eaf27cb70e7114eaead169cab617942f45627527 100644 --- a/src/python/__init__.py +++ b/src/python/__init__.py @@ -58,22 +58,6 @@ def get_unit(header): return Unit() -def get_domain(header, data): - """Get the unit from a header - - Args: - header (str): Column header to get the unit of - - Returns: - str: The string representation of the unit of the features - """ - try: - domain_str = header.split(":")[1].replace(" ", "") - return Domain(domain_str) - except IndexError: - return Domain(np.min(data), np.max(data)) - - def generate_phi_0_from_csv( df, prop_key, cols="all", task_key=None, leave_out_frac=0.0, leave_out_inds=None ): @@ -187,25 +171,15 @@ def generate_phi_0_from_csv( columns = df.columns.tolist() exprs = list([col.split(":")[0].split("(")[0] for col in columns]) units = list([get_unit(col) for col in columns]) - domain = list( - [ - get_domain(col, df.reindex(index=df.index[train_inds], columns=[col]).values) - for col in columns - ] - ) initialize_values_arr(len(train_inds), len(leave_out_inds), len(columns)) test_values = df.to_numpy().T[:, leave_out_inds] values = df.to_numpy().T[:, train_inds] feat_ind = 0 - for expr, domain, unit, value, test_value in zip( - exprs, domain, units, values, test_values - ): + for expr, unit, value, test_value in zip(exprs, units, values, test_values): phi_0.append( - FeatureNode( - feat_ind, expr.replace(" ", ""), value, test_value, domain, unit - ) + FeatureNode(feat_ind, expr.replace(" ", ""), value, test_value, unit) ) feat_ind += 1 diff --git a/src/python/bindings_docstring_keyed.cpp b/src/python/bindings_docstring_keyed.cpp index f94b75857ed1ea76f6727c6d54c231352da990f3..6e7d1d83a9ca908524f672c2b065cb0023dc08aa 100644 --- a/src/python/bindings_docstring_keyed.cpp +++ b/src/python/bindings_docstring_keyed.cpp @@ -7,15 +7,12 @@ void sisso::register_all() sisso::descriptor_identifier::registerModel(); sisso::descriptor_identifier::registerModelRegressor(); sisso::descriptor_identifier::registerModelLogRegressor(); - sisso::descriptor_identifier::registerModelLogRegressorFitC(); sisso::descriptor_identifier::registerModelClassifier(); sisso::descriptor_identifier::registerSISSO_DI(); sisso::descriptor_identifier::registerSISSORegressor(); sisso::descriptor_identifier::registerSISSOLogRegressor(); - sisso::descriptor_identifier::registerSISSOLogRegressorFitC(); sisso::descriptor_identifier::registerSISSOClassifier(); sisso::feature_creation::registerFeatureSpace(); - sisso::feature_creation::registerDomain(); sisso::feature_creation::registerUnit(); sisso::feature_creation::node::registerNode(); sisso::feature_creation::node::registerFeatureNode(); @@ -192,11 +189,10 @@ void sisso::feature_creation::node::registerFeatureNode() std::string (FeatureNode::*expr_const)() const = &FeatureNode::expr; void (FeatureNode::*set_value_no_param)(int) = &FeatureNode::set_value; void (FeatureNode::*set_test_value_no_param)(int) = &FeatureNode::set_test_value; - Domain (FeatureNode::*domain_no_param)() = &FeatureNode::domain; using namespace boost::python; - class_<FeatureNode, bases<Node>>("FeatureNode", init<int, std::string, np::ndarray, np::ndarray, Domain, Unit>()) - .def(init<int, std::string, py::list, py::list, Domain, Unit>()) + class_<FeatureNode, bases<Node>>("FeatureNode", init<int, std::string, np::ndarray, np::ndarray, Unit>()) + .def(init<int, std::string, py::list, py::list, Unit>()) .def("is_nan", &FeatureNode::is_nan, "@DocString_feat_node_is_nan@") .def("is_const", &FeatureNode::is_const, "@DocString_feat_node_is_const@") .def("set_value", set_value_no_param, "@DocString_feat_node_set_value@") @@ -228,7 +224,6 @@ void sisso::feature_creation::node::registerAddNode() void (AddNode::*set_value_no_param)(int) = &AddNode::set_value; void (AddNode::*set_test_value_no_param)(int) = &AddNode::set_test_value; std::string (AddNode::*expr_no_param)() = &AddNode::expr; - Domain (AddNode::*domain_no_param)() = &AddNode::domain; class_<AddNode, bases<OperatorNode<2>>>("AddNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_add_node_set_value@") @@ -244,7 +239,6 @@ void sisso::feature_creation::node::registerSubNode() void (SubNode::*set_value_no_param)(int) = &SubNode::set_value; void (SubNode::*set_test_value_no_param)(int) = &SubNode::set_test_value; std::string (SubNode::*expr_no_param)() = &SubNode::expr; - Domain (SubNode::*domain_no_param)() = &SubNode::domain; class_<SubNode, bases<OperatorNode<2>>>("SubNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_sub_node_set_value@") @@ -260,7 +254,6 @@ void sisso::feature_creation::node::registerDivNode() void (DivNode::*set_value_no_param)(int) = &DivNode::set_value; void (DivNode::*set_test_value_no_param)(int) = &DivNode::set_test_value; std::string (DivNode::*expr_no_param)() = &DivNode::expr; - Domain (DivNode::*domain_no_param)() = &DivNode::domain; class_<DivNode, bases<OperatorNode<2>>>("DivNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_div_node_set_value@") @@ -276,7 +269,6 @@ void sisso::feature_creation::node::registerMultNode() void (MultNode::*set_value_no_param)(int) = &MultNode::set_value; void (MultNode::*set_test_value_no_param)(int) = &MultNode::set_test_value; std::string (MultNode::*expr_no_param)() = &MultNode::expr; - Domain (MultNode::*domain_no_param)() = &MultNode::domain; class_<MultNode, bases<OperatorNode<2>>>("MultNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_mult_node_set_value@") @@ -292,7 +284,6 @@ void sisso::feature_creation::node::registerAbsDiffNode() void (AbsDiffNode::*set_value_no_param)(int) = &AbsDiffNode::set_value; void (AbsDiffNode::*set_test_value_no_param)(int) = &AbsDiffNode::set_test_value; std::string (AbsDiffNode::*expr_no_param)() = &AbsDiffNode::expr; - Domain (AbsDiffNode::*domain_no_param)() = &AbsDiffNode::domain; class_<AbsDiffNode, bases<OperatorNode<2>>>("AbsDiffNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_abs_diff_node_set_value@") @@ -308,7 +299,6 @@ void sisso::feature_creation::node::registerAbsNode() void (AbsNode::*set_value_no_param)(int) = &AbsNode::set_value; void (AbsNode::*set_test_value_no_param)(int) = &AbsNode::set_test_value; std::string (AbsNode::*expr_no_param)() = &AbsNode::expr; - Domain (AbsNode::*domain_no_param)() = &AbsNode::domain; class_<AbsNode, bases<OperatorNode<1>>>("AbsNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_abs_node_set_value@") @@ -324,7 +314,6 @@ void sisso::feature_creation::node::registerInvNode() void (InvNode::*set_value_no_param)(int) = &InvNode::set_value; void (InvNode::*set_test_value_no_param)(int) = &InvNode::set_test_value; std::string (InvNode::*expr_no_param)() = &InvNode::expr; - Domain (InvNode::*domain_no_param)() = &InvNode::domain; class_<InvNode, bases<OperatorNode<1>>>("InvNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_inv_node_set_value@") @@ -340,7 +329,6 @@ void sisso::feature_creation::node::registerLogNode() void (LogNode::*set_value_no_param)(int) = &LogNode::set_value; void (LogNode::*set_test_value_no_param)(int) = &LogNode::set_test_value; std::string (LogNode::*expr_no_param)() = &LogNode::expr; - Domain (LogNode::*domain_no_param)() = &LogNode::domain; class_<LogNode, bases<OperatorNode<1>>>("LogNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_log_node_set_value@") @@ -356,7 +344,6 @@ void sisso::feature_creation::node::registerExpNode() void (ExpNode::*set_value_no_param)(int) = &ExpNode::set_value; void (ExpNode::*set_test_value_no_param)(int) = &ExpNode::set_test_value; std::string (ExpNode::*expr_no_param)() = &ExpNode::expr; - Domain (ExpNode::*domain_no_param)() = &ExpNode::domain; class_<ExpNode, bases<OperatorNode<1>>>("ExpNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_exp_node_set_value@") @@ -372,7 +359,6 @@ void sisso::feature_creation::node::registerNegExpNode() void (NegExpNode::*set_value_no_param)(int) = &NegExpNode::set_value; void (NegExpNode::*set_test_value_no_param)(int) = &NegExpNode::set_test_value; std::string (NegExpNode::*expr_no_param)() = &NegExpNode::expr; - Domain (NegExpNode::*domain_no_param)() = &NegExpNode::domain; class_<NegExpNode, bases<OperatorNode<1>>>("NegExpNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_neg_exp_node_set_value@") @@ -388,7 +374,6 @@ void sisso::feature_creation::node::registerSinNode() void (SinNode::*set_value_no_param)(int) = &SinNode::set_value; void (SinNode::*set_test_value_no_param)(int) = &SinNode::set_test_value; std::string (SinNode::*expr_no_param)() = &SinNode::expr; - Domain (SinNode::*domain_no_param)() = &SinNode::domain; class_<SinNode, bases<OperatorNode<1>>>("SinNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_sin_node_set_value@") @@ -404,7 +389,6 @@ void sisso::feature_creation::node::registerCosNode() void (CosNode::*set_value_no_param)(int) = &CosNode::set_value; void (CosNode::*set_test_value_no_param)(int) = &CosNode::set_test_value; std::string (CosNode::*expr_no_param)() = &CosNode::expr; - Domain (CosNode::*domain_no_param)() = &CosNode::domain; class_<CosNode, bases<OperatorNode<1>>>("CosNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_cos_node_set_value@") @@ -420,7 +404,6 @@ void sisso::feature_creation::node::registerCbNode() void (CbNode::*set_value_no_param)(int) = &CbNode::set_value; void (CbNode::*set_test_value_no_param)(int) = &CbNode::set_test_value; std::string (CbNode::*expr_no_param)() = &CbNode::expr; - Domain (CbNode::*domain_no_param)() = &CbNode::domain; class_<CbNode, bases<OperatorNode<1>>>("CbNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_cb_node_set_value@") @@ -436,7 +419,6 @@ void sisso::feature_creation::node::registerCbrtNode() void (CbrtNode::*set_value_no_param)(int) = &CbrtNode::set_value; void (CbrtNode::*set_test_value_no_param)(int) = &CbrtNode::set_test_value; std::string (CbrtNode::*expr_no_param)() = &CbrtNode::expr; - Domain (CbrtNode::*domain_no_param)() = &CbrtNode::domain; class_<CbrtNode, bases<OperatorNode<1>>>("CbrtNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_cbrt_node_set_value@") @@ -452,7 +434,6 @@ void sisso::feature_creation::node::registerSqNode() void (SqNode::*set_value_no_param)(int) = &SqNode::set_value; void (SqNode::*set_test_value_no_param)(int) = &SqNode::set_test_value; std::string (SqNode::*expr_no_param)() = &SqNode::expr; - Domain (SqNode::*domain_no_param)() = &SqNode::domain; class_<SqNode, bases<OperatorNode<1>>>("SqNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_sq_node_set_value@") @@ -468,7 +449,6 @@ void sisso::feature_creation::node::registerSqrtNode() void (SqrtNode::*set_value_no_param)(int) = &SqrtNode::set_value; void (SqrtNode::*set_test_value_no_param)(int) = &SqrtNode::set_test_value; std::string (SqrtNode::*expr_no_param)() = &SqrtNode::expr; - Domain (SqrtNode::*domain_no_param)() = &SqrtNode::domain; class_<SqrtNode, bases<OperatorNode<1>>>("SqrtNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_sqrt_node_set_value@") @@ -484,7 +464,6 @@ void sisso::feature_creation::node::registerSixPowNode() void (SixPowNode::*set_value_no_param)(int) = &SixPowNode::set_value; void (SixPowNode::*set_test_value_no_param)(int) = &SixPowNode::set_test_value; std::string (SixPowNode::*expr_no_param)() = &SixPowNode::expr; - Domain (SixPowNode::*domain_no_param)() = &SixPowNode::domain; class_<SixPowNode, bases<OperatorNode<1>>>("SixPowNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_six_pow_node_set_value@") @@ -500,7 +479,6 @@ void sisso::feature_creation::node::registerAddParamNode() void (AddParamNode::*set_value_no_param)(int) = &AddParamNode::set_value; void (AddParamNode::*set_test_value_no_param)(int) = &AddParamNode::set_test_value; std::string (AddParamNode::*expr_no_param)() = &AddParamNode::expr; - Domain (AddParamNode::*domain_no_param)() = &AddParamNode::domain; class_<AddParamNode, bases<AddNode>>("AddParamNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_add_param_node_set_value@") @@ -516,7 +494,6 @@ void sisso::feature_creation::node::registerSubParamNode() void (SubParamNode::*set_value_no_param)(int) = &SubParamNode::set_value; void (SubParamNode::*set_test_value_no_param)(int) = &SubParamNode::set_test_value; std::string (SubParamNode::*expr_no_param)() = &SubParamNode::expr; - Domain (SubParamNode::*domain_no_param)() = &SubParamNode::domain; class_<SubParamNode, bases<SubNode>>("SubParamNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_sub_param_node_set_value@") @@ -532,7 +509,6 @@ void sisso::feature_creation::node::registerDivParamNode() void (DivParamNode::*set_value_no_param)(int) = &DivParamNode::set_value; void (DivParamNode::*set_test_value_no_param)(int) = &DivParamNode::set_test_value; std::string (DivParamNode::*expr_no_param)() = &DivParamNode::expr; - Domain (DivParamNode::*domain_no_param)() = &DivParamNode::domain; class_<DivParamNode, bases<DivNode>>("DivParamNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_div_param_node_set_value@") @@ -548,7 +524,6 @@ void sisso::feature_creation::node::registerMultParamNode() void (MultParamNode::*set_value_no_param)(int) = &MultParamNode::set_value; void (MultParamNode::*set_test_value_no_param)(int) = &MultParamNode::set_test_value; std::string (MultParamNode::*expr_no_param)() = &MultParamNode::expr; - Domain (MultParamNode::*domain_no_param)() = &MultParamNode::domain; class_<MultParamNode, bases<MultNode>>("MultParamNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_mult_param_node_set_value@") @@ -564,7 +539,6 @@ void sisso::feature_creation::node::registerAbsDiffParamNode() void (AbsDiffParamNode::*set_value_no_param)(int) = &AbsDiffParamNode::set_value; void (AbsDiffParamNode::*set_test_value_no_param)(int) = &AbsDiffParamNode::set_test_value; std::string (AbsDiffParamNode::*expr_no_param)() = &AbsDiffParamNode::expr; - Domain (AbsDiffParamNode::*domain_no_param)() = &AbsDiffParamNode::domain; class_<AbsDiffParamNode, bases<AbsDiffNode>>("AbsDiffParamNode", init<node_ptr, node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_abs_diff_param_node_set_value@") @@ -580,7 +554,6 @@ void sisso::feature_creation::node::registerAbsParamNode() void (AbsParamNode::*set_value_no_param)(int) = &AbsParamNode::set_value; void (AbsParamNode::*set_test_value_no_param)(int) = &AbsParamNode::set_test_value; std::string (AbsParamNode::*expr_no_param)() = &AbsParamNode::expr; - Domain (AbsParamNode::*domain_no_param)() = &AbsParamNode::domain; class_<AbsParamNode, bases<AbsNode>>("AbsParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_abs_param_node_set_value@") @@ -596,7 +569,6 @@ void sisso::feature_creation::node::registerInvParamNode() void (InvParamNode::*set_value_no_param)(int) = &InvParamNode::set_value; void (InvParamNode::*set_test_value_no_param)(int) = &InvParamNode::set_test_value; std::string (InvParamNode::*expr_no_param)() = &InvParamNode::expr; - Domain (InvParamNode::*domain_no_param)() = &InvParamNode::domain; class_<InvParamNode, bases<InvNode>>("InvParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_inv_param_node_set_value@") @@ -612,7 +584,6 @@ void sisso::feature_creation::node::registerLogParamNode() void (LogParamNode::*set_value_no_param)(int) = &LogParamNode::set_value; void (LogParamNode::*set_test_value_no_param)(int) = &LogParamNode::set_test_value; std::string (LogParamNode::*expr_no_param)() = &LogParamNode::expr; - Domain (LogParamNode::*domain_no_param)() = &LogParamNode::domain; class_<LogParamNode, bases<LogNode>>("LogParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_log_param_node_set_value@") @@ -628,7 +599,6 @@ void sisso::feature_creation::node::registerExpParamNode() void (ExpParamNode::*set_value_no_param)(int) = &ExpParamNode::set_value; void (ExpParamNode::*set_test_value_no_param)(int) = &ExpParamNode::set_test_value; std::string (ExpParamNode::*expr_no_param)() = &ExpParamNode::expr; - Domain (ExpParamNode::*domain_no_param)() = &ExpParamNode::domain; class_<ExpParamNode, bases<ExpNode>>("ExpParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_exp_param_node_set_value@") @@ -644,7 +614,6 @@ void sisso::feature_creation::node::registerNegExpParamNode() void (NegExpParamNode::*set_value_no_param)(int) = &NegExpParamNode::set_value; void (NegExpParamNode::*set_test_value_no_param)(int) = &NegExpParamNode::set_test_value; std::string (NegExpParamNode::*expr_no_param)() = &NegExpParamNode::expr; - Domain (NegExpParamNode::*domain_no_param)() = &NegExpParamNode::domain; class_<NegExpParamNode, bases<NegExpNode>>("NegExpParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_neg_exp_param_node_set_value@") @@ -660,7 +629,6 @@ void sisso::feature_creation::node::registerSinParamNode() void (SinParamNode::*set_value_no_param)(int) = &SinParamNode::set_value; void (SinParamNode::*set_test_value_no_param)(int) = &SinParamNode::set_test_value; std::string (SinParamNode::*expr_no_param)() = &SinParamNode::expr; - Domain (SinParamNode::*domain_no_param)() = &SinParamNode::domain; class_<SinParamNode, bases<SinNode>>("SinParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_sin_param_node_set_value@") @@ -676,7 +644,6 @@ void sisso::feature_creation::node::registerCosParamNode() void (CosParamNode::*set_value_no_param)(int) = &CosParamNode::set_value; void (CosParamNode::*set_test_value_no_param)(int) = &CosParamNode::set_test_value; std::string (CosParamNode::*expr_no_param)() = &CosParamNode::expr; - Domain (CosParamNode::*domain_no_param)() = &CosParamNode::domain; class_<CosParamNode, bases<CosNode>>("CosParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_cos_param_node_set_value@") @@ -692,7 +659,6 @@ void sisso::feature_creation::node::registerCbParamNode() void (CbParamNode::*set_value_no_param)(int) = &CbParamNode::set_value; void (CbParamNode::*set_test_value_no_param)(int) = &CbParamNode::set_test_value; std::string (CbParamNode::*expr_no_param)() = &CbParamNode::expr; - Domain (CbParamNode::*domain_no_param)() = &CbParamNode::domain; class_<CbParamNode, bases<CbNode>>("CbParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_cb_param_node_set_value@") @@ -708,7 +674,6 @@ void sisso::feature_creation::node::registerCbrtParamNode() void (CbrtParamNode::*set_value_no_param)(int) = &CbrtParamNode::set_value; void (CbrtParamNode::*set_test_value_no_param)(int) = &CbrtParamNode::set_test_value; std::string (CbrtParamNode::*expr_no_param)() = &CbrtParamNode::expr; - Domain (CbrtParamNode::*domain_no_param)() = &CbrtParamNode::domain; class_<CbrtParamNode, bases<CbrtNode>>("CbrtParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_cbrt_param_node_set_value@") @@ -724,7 +689,6 @@ void sisso::feature_creation::node::registerSqParamNode() void (SqParamNode::*set_value_no_param)(int) = &SqParamNode::set_value; void (SqParamNode::*set_test_value_no_param)(int) = &SqParamNode::set_test_value; std::string (SqParamNode::*expr_no_param)() = &SqParamNode::expr; - Domain (SqParamNode::*domain_no_param)() = &SqParamNode::domain; class_<SqParamNode, bases<SqNode>>("SqParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_sq_param_node_set_value@") @@ -740,7 +704,6 @@ void sisso::feature_creation::node::registerSqrtParamNode() void (SqrtParamNode::*set_value_no_param)(int) = &SqrtParamNode::set_value; void (SqrtParamNode::*set_test_value_no_param)(int) = &SqrtParamNode::set_test_value; std::string (SqrtParamNode::*expr_no_param)() = &SqrtParamNode::expr; - Domain (SqrtParamNode::*domain_no_param)() = &SqrtParamNode::domain; class_<SqrtParamNode, bases<SqrtNode>>("SqrtParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_sqrt_param_node_set_value@") @@ -756,7 +719,6 @@ void sisso::feature_creation::node::registerSixPowParamNode() void (SixPowParamNode::*set_value_no_param)(int) = &SixPowParamNode::set_value; void (SixPowParamNode::*set_test_value_no_param)(int) = &SixPowParamNode::set_test_value; std::string (SixPowParamNode::*expr_no_param)() = &SixPowParamNode::expr; - Domain (SixPowParamNode::*domain_no_param)() = &SixPowParamNode::domain; class_<SixPowParamNode, bases<SixPowNode>>("SixPowParamNode", init<node_ptr, int, double, double>()) .def("set_value", set_value_no_param, "@DocString_six_pow_param_node_set_value@") @@ -829,16 +791,6 @@ void sisso::descriptor_identifier::registerModelLogRegressor() ; } -void sisso::descriptor_identifier::registerModelLogRegressorFitC() -{ - class_<ModelLogRegressorFitC, bases<ModelLogRegressor>>("ModelLogRegressorFitC", init<Unit, std::vector<double>, std::vector<double>, std::vector<model_node_ptr>, std::vector<int>, std::vector<int>, bool>()) - .def(init<std::string>()) - .def(init<std::string, std::string>()) - .def("__str__", &ModelLogRegressorFitC::toString, "@DocString_model_reg_str@") - .def("__repr__", &ModelLogRegressorFitC::toString, "@DocString_model_reg_str@") - ; -} - void sisso::descriptor_identifier::registerModelClassifier() { class_<ModelClassifier, bases<Model>>("ModelClassifier", init<std::string, Unit, std::vector<double>, std::vector<double>, std::vector<model_node_ptr>, std::vector<int>, std::vector<int>, bool>()) @@ -896,15 +848,6 @@ void sisso::descriptor_identifier::registerSISSOLogRegressor() ; } -void sisso::descriptor_identifier::registerSISSOLogRegressorFitC() -{ - class_<SISSOLogRegressorFitC, bases<SISSOLogRegressor>>("SISSOLogRegressorFitC", init<std::shared_ptr<FeatureSpace>, Unit, np::ndarray, np::ndarray, py::list, py::list, py::list, int, int, int, optional<bool>>()) - .def(init<std::shared_ptr<FeatureSpace>, Unit, py::list, py::list, py::list, py::list, py::list, int, int, int, optional<bool>>()) - .def("fit", &SISSOLogRegressorFitC::fit, "@DocString_sisso_reg_fit@") - .add_property("models", &SISSOLogRegressorFitC::models_py, "@DocString_sisso_reg_models_py@") - ; -} - void sisso::descriptor_identifier::registerSISSOClassifier() { class_<SISSOClassifier, bases<SISSO_DI>>("SISSOClassifier", init<std::shared_ptr<FeatureSpace>, std::string, Unit, np::ndarray, np::ndarray, py::list, py::list, py::list, int, int, int>()) diff --git a/src/python/bindings_docstring_keyed.hpp b/src/python/bindings_docstring_keyed.hpp index 29f8be511712168358b21746bb5c3d11958d48f1..0d4fa6d02eb90d12ee8cdb3ce174d709e557a584 100644 --- a/src/python/bindings_docstring_keyed.hpp +++ b/src/python/bindings_docstring_keyed.hpp @@ -10,7 +10,6 @@ #include <descriptor_identifier/SISSO_DI/SISSO_DI.hpp> #include <descriptor_identifier/SISSO_DI/SISSORegressor.hpp> #include <descriptor_identifier/SISSO_DI/SISSOLogRegressor.hpp> -#include <descriptor_identifier/SISSO_DI/SISSOLogRegressorFitC.hpp> #include <descriptor_identifier/SISSO_DI/SISSOClassifier.hpp> #include <feature_creation/feature_space/FeatureSpace.hpp> #include <nl_opt/NLOptWrapper.hpp> @@ -67,8 +66,6 @@ namespace sisso inline std::vector<double> parameters(){return this->get_override("parameters")();} inline void set_parameters(std::vector<double>){this->get_override("set_parameters")();} inline void set_bounds(double* lb, double* ub, int from_parent=2, int depth=1){this->get_override("set_bounds")();} - inline Domain domain(){return this->get_override("domain")();} - inline Domain domain(double* params, int depth=1){return this->get_override("domain")();} inline int n_feats(){this->get_override("n_feats");} inline std::shared_ptr<Node> feat(int ind){this->get_override("feat");} inline void param_derivative(const double* params, double* dfdp){this->get_override("param_derivative");} @@ -92,8 +89,6 @@ namespace sisso inline std::string get_latex_expr(std::string cap){return this->get_override("latex_expr")();} inline Unit unit(){return this->get_override("unit")();} inline std::string get_postfix_term(){return this->get_override("get_postfix_term")();} - inline Domain domain(double* params, int depth=1){return this->get_override("domain")();} - inline Domain domain(){return this->get_override("domain")();} inline std::string expr(double* params, int depth=1){return this->get_override("expr")();} inline std::string expr(){return this->get_override("expr")();} inline void update_add_sub_leaves(std::map<std::string, int>& add_sub_leaves, int pl_mn, int& expected_abs_tot){this->get_override("update_add_sub_leaves")();} @@ -135,7 +130,6 @@ namespace sisso .def("rung", py::pure_virtual(&OperatorNode<N>::rung), "@DocString_op_node_rung@") .def("expr", py::pure_virtual(&OperatorNode<N>::expr), "@DocString_op_node_expr@") .def("unit", py::pure_virtual(&OperatorNode<N>::unit), "@DocString_op_node_unit@") - .def("domain", pure_virtual(&OperatorNode<N>::domain), "@DocString_op_node_domain@") .add_property("n_feats", &OperatorNode<N>::n_feats, "@DocString_op_node_n_feats@") .add_property("feat", &OperatorNode<N>::feat, "@DocString_op_node_feat@") ; @@ -370,11 +364,6 @@ namespace sisso */ static void registerModelLogRegressor(); - /** - * @brief Register the Model object for conversion to a python object - */ - static void registerModelLogRegressorFitC(); - /** * @brief Register the Model object for conversion to a python object */ @@ -395,11 +384,6 @@ namespace sisso */ static void registerSISSOLogRegressor(); - /** - * @brief Register the SISSORegressor object for conversion to a python object - */ - static void registerSISSOLogRegressorFitC(); - /** * @brief Register the SISSORegressor object for conversion to a python object */ diff --git a/src/python/descriptor_identifier/SISSOLogRegressorFitC.cpp b/src/python/descriptor_identifier/SISSOLogRegressorFitC.cpp deleted file mode 100644 index ad5ad1acbd6dfe014b0656a0c3df066c8ba46e03..0000000000000000000000000000000000000000 --- a/src/python/descriptor_identifier/SISSOLogRegressorFitC.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include <descriptor_identifier/SISSO_DI/SISSOLogRegressorFitC.hpp> - -SISSOLogRegressorFitC::SISSOLogRegressorFitC( - std::shared_ptr<FeatureSpace> feat_space, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - bool fix_intercept -) : - SISSOLogRegressor(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept) -{ - // fix intercept has to be true since its done via nlopt - _fix_intercept = true; - - std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop); - - std::copy_n(prop_vec.data(), prop_vec.size(), _prop.data()); - _ub_nl_opt.resize(_task_sizes_train.size() * 2, HUGE_VAL); - int start = 0; - for(int tt = 0; tt< _task_sizes_train.size(); ++tt) - { - _ub_nl_opt[2*tt] = *std::min_element(_prop.begin() + start, _prop.begin() + start + _task_sizes_train[tt]); - start += _task_sizes_train[tt]; - } -} - -SISSOLogRegressorFitC::SISSOLogRegressorFitC( - std::shared_ptr<FeatureSpace> feat_space, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - bool fix_intercept -) : - SISSOLogRegressor(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept) -{ - // fix intercept has to be true since its done via nlopt - _fix_intercept = true; - - std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop); - - std::copy_n(prop_vec.data(), prop_vec.size(), _prop.data()); - _ub_nl_opt.resize(_task_sizes_train.size() * 2, HUGE_VAL); - int start = 0; - for(int tt = 0; tt< _task_sizes_train.size(); ++tt) - { - _ub_nl_opt[2*tt] = *std::min_element(_prop.begin() + start, _prop.begin() + start + _task_sizes_train[tt]); - start += _task_sizes_train[tt]; - } -} - -py::list SISSOLogRegressorFitC::models_py() -{ - py::list model_list; - for(auto& mod_list : _models) - { - py::list lst; - for(auto& mod : mod_list) - lst.append<ModelLogRegressorFitC>(mod); - model_list.append<py::list>(lst); - } - return model_list; -} diff --git a/src/python/feature_creation/FeatureNode.cpp b/src/python/feature_creation/FeatureNode.cpp index 82348e150fcb2074a55160d4932a6e55d57117cd..cbda9f08863265529a122b8d378a75a44839d74b 100644 --- a/src/python/feature_creation/FeatureNode.cpp +++ b/src/python/feature_creation/FeatureNode.cpp @@ -1,16 +1,12 @@ #include <feature_creation/node/FeatureNode.hpp> -FeatureNode::FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Domain domain, Unit unit) : +FeatureNode::FeatureNode(int feat_ind, std::string expr, np::ndarray value, np::ndarray test_value, Unit unit) : Node(feat_ind, value.shape(0), test_value.shape(0)), _value(python_conv_utils::from_ndarray<double>(value)), _test_value(python_conv_utils::from_ndarray<double>(test_value)), _unit(unit), - _domain(domain), _expr(expr) { - if((*std::max_element(_value.begin(), _value.end()) > _domain) || (*std::min_element(_value.begin(), _value.end()) < _domain)) - throw std::logic_error("The feature " + expr + " has values outside of the domain."); - // Automatically resize the storage arrays if(node_value_arrs::N_STORE_FEATURES == 0) node_value_arrs::initialize_values_arr(_n_samp, _n_test_samp, 1); @@ -23,17 +19,13 @@ FeatureNode::FeatureNode(int feat_ind, std::string expr, np::ndarray value, np:: set_test_value(); } -FeatureNode::FeatureNode(int feat_ind, std::string expr, py::list value, py::list test_value, Domain domain, Unit unit) : +FeatureNode::FeatureNode(int feat_ind, std::string expr, py::list value, py::list test_value, Unit unit) : Node(feat_ind, py::len(value), py::len(test_value)), _value(python_conv_utils::from_list<double>(value)), _test_value(python_conv_utils::from_list<double>(test_value)), _unit(unit), - _domain(domain), _expr(expr) { - if((*std::max_element(_value.begin(), _value.end()) > _domain) || (*std::min_element(_value.begin(), _value.end()) < _domain)) - throw std::logic_error("The feature " + expr + " has values outside of the domain."); - // Automatically resize the storage arrays if(node_value_arrs::N_STORE_FEATURES == 0) node_value_arrs::initialize_values_arr(_n_samp, _n_test_samp, 1); diff --git a/src/python/feature_creation/FeatureSpace.cpp b/src/python/feature_creation/FeatureSpace.cpp index b04e5961caae224ab55304fb41ab2784b774abf4..59546e435f969b4501601517796bc7b9dcf8e9ff 100644 --- a/src/python/feature_creation/FeatureSpace.cpp +++ b/src/python/feature_creation/FeatureSpace.cpp @@ -199,7 +199,7 @@ py::list FeatureSpace::phi0_py() { py::list feat_lst; for(auto& feat : _phi_0) - feat_lst.append<FeatureNode>(FeatureNode(feat->feat_ind(), feat->expr(), feat->value(), feat->test_value(), feat->domain(), feat->unit())); + feat_lst.append<FeatureNode>(FeatureNode(feat->feat_ind(), feat->expr(), feat->value(), feat->test_value(), feat->unit())); return feat_lst; } diff --git a/src/utils/project.cpp b/src/utils/project.cpp index d7fdf57ae1c139f42fa76aef3e35846347487255..6ad6946b33dc32772984d85a617df180eba7f3ac 100644 --- a/src/utils/project.cpp +++ b/src/utils/project.cpp @@ -66,60 +66,6 @@ void project_funcs::project_log_r2(double* prop, double* scores, std::vector<nod } } -void project_funcs::project_log_r2_fit_c(double* prop, double* scores, std::vector<node_ptr>& phi, const std::vector<int>& sizes, int n_prop) -{ - int n_samp = std::accumulate(sizes.begin(), sizes.end(), 0); - std::fill_n(scores, phi.size(), HUGE_VAL); - #pragma omp parallel firstprivate(prop) - { - std::vector<double> c_coefs(2 * nlopt_wrapper::_task_sizes.size(), 1.0); - std::vector<double> c_coefs_orig(2 * nlopt_wrapper::_task_sizes.size(), 1.0); - - std::vector<double> ub(c_coefs.size(), HUGE_VAL); - std::vector<double> log_feat(n_samp, 0.0); - - nlopt_wrapper::l0_data d; - d._n_feat = 1; - d._a = log_feat.data(); - - nlopt::opt opt(nlopt::LN_SBPLX, c_coefs.size()); - opt.set_maxeval(1000); - opt.set_xtol_rel(1e-6); - opt.set_min_objective(nlopt_wrapper::objective_log_least_sq, &d); - double minf = HUGE_VAL; - for(int pp = 0; pp < n_prop; ++pp) - { - std::fill_n(c_coefs_orig.data(), c_coefs_orig.size(), 1.0); - int start = 0; - for(int tt = 0; tt < nlopt_wrapper::_task_sizes.size(); ++tt) - { - ub[tt*2] = *std::min_element(prop + start, prop + start + nlopt_wrapper::_task_sizes[tt]); - c_coefs_orig[tt*2] = std::min(0.0, ub[tt*2] - 0.01); - start += nlopt_wrapper::_task_sizes[tt]; - } - opt.set_upper_bounds(ub); - d._prop = prop; - - #pragma omp for schedule(guided) - for(int ff = 0; ff < phi.size(); ++ff) - { - if(*std::min_element(phi[ff]->value_ptr(), phi[ff]->value_ptr() + phi[ff]->n_samp()) < 0.0) - continue; - std::transform(phi[ff]->value_ptr(), phi[ff]->value_ptr() + n_samp, d._a, [](double fv){return std::log(fv);}); - std::copy_n(c_coefs_orig.data(), c_coefs_orig.size(), c_coefs.data()); - try - { - nlopt::result result = opt.optimize(c_coefs, minf); - scores[ff] = std::min(scores[ff], minf); - } - catch(std::exception &e) - {} - } - #pragma omp barrier - } - } -} - void project_funcs::project_classify(double* prop, double* scores, std::vector<node_ptr>& phi, const std::vector<int>& sizes, int n_prop) { int n_samp = std::accumulate(sizes.begin(), sizes.end(), 0); diff --git a/src/utils/project.hpp b/src/utils/project.hpp index 68d0487b4fc5c469907ec3c38b4c1a0ca496b1fb..36f723c6f35bdf496b1e73a0e97ecef6bf9dd8cb 100644 --- a/src/utils/project.hpp +++ b/src/utils/project.hpp @@ -50,17 +50,6 @@ namespace project_funcs */ void project_log_r2(double* prop, double* scores, std::vector<node_ptr>& phi, const std::vector<int>& size, int n_prop); - /** - * @brief Calculate the projection scores of a set of features to a vector via Coefficient of Determination (on the log transformed problem) while allowing the intercepts to change - * - * @param prop The pointer to the property vector to calculate the Coefficient of Determination against - * @param scores The pointer of the score vector - * @param phi The set of features to calculate the Coefficient of Determination of - * @param size Vector of the size of all of the tasks - * @param n_prop The number of properties to calculate the Coefficient of Determination of and return the maximum of - */ - void project_log_r2_fit_c(double* prop, double* scores, std::vector<node_ptr>& phi, const std::vector<int>& size, int n_prop); - /** * @brief Calculate projection scores for classification * @details Create a 1D-convex hull for each class and calculate the projection score @@ -115,18 +104,6 @@ namespace project_funcs * @param n_prop number of properties */ void project_classify_no_omp(double* prop, double* scores, std::vector<node_ptr>& phi, const std::vector<int>& size, int n_prop); - - /** - * @brief Calculate the projection scores of a set of features to a vector via Coefficient of Determination (on the log transformed problem) while allowing the intercepts to change - * - * @param prop The pointer to the property vector to calculate the Coefficient of Determination against - * @param scores The pointer of the score vector - * @param phi The set of features to calculate the Coefficient of Determination of - * @param size Vector of the size of all of the tasks - * @param n_prop The number of properties to calculate the Coefficient of Determination of and return the maximum of - */ - void project_log_r2_fit_c_no_omp(double* prop, double* scores, std::vector<node_ptr>& phi, const std::vector<int>& size, int n_prop); - } #endif diff --git a/tests/pytest/test_feature_creation/test_domain.py b/tests/.removed_features/test_domain.py similarity index 100% rename from tests/pytest/test_feature_creation/test_domain.py rename to tests/.removed_features/test_domain.py diff --git a/tests/pytest/test_descriptor_identifier/test_log_regressor_fit_c.py b/tests/.removed_features/test_log_regressor_fit_c.py similarity index 100% rename from tests/pytest/test_descriptor_identifier/test_log_regressor_fit_c.py rename to tests/.removed_features/test_log_regressor_fit_c.py diff --git a/tests/googletest/descriptor_identification/model/test_model_log_regressor_fit_c.cc b/tests/.removed_features/test_model_log_regressor_fit_c.cc similarity index 100% rename from tests/googletest/descriptor_identification/model/test_model_log_regressor_fit_c.cc rename to tests/.removed_features/test_model_log_regressor_fit_c.cc diff --git a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor_fit_c.cc b/tests/.removed_features/test_sisso_log_regressor_fit_c.cc similarity index 100% rename from tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor_fit_c.cc rename to tests/.removed_features/test_sisso_log_regressor_fit_c.cc