diff --git a/src/descriptor_identifier/Model/Model.cpp b/src/descriptor_identifier/Model/Model.cpp index c90bdd3dc415fc75be1f1cc7aeeca639765d82bd..4b43809bb0cab5df6e4be138c2d6c164575208a3 100644 --- a/src/descriptor_identifier/Model/Model.cpp +++ b/src/descriptor_identifier/Model/Model.cpp @@ -1,14 +1,14 @@ #include <descriptor_identifier/Model/Model.hpp> Model::Model( - std::string prop_label, - Unit prop_unit, - std::vector<double> prop_train, - std::vector<double> prop_test, - std::vector<model_node_ptr> feats, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - bool fix_intercept + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop_train, + const std::vector<double> prop_test, + const std::vector<model_node_ptr> feats, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const bool fix_intercept ) : _n_samp_train(feats[0]->n_samp()), _n_samp_test(feats[0]->n_test_samp()), @@ -30,7 +30,7 @@ Model::Model() : _task_eval(0) {} -double Model::eval(std::vector<double> x_in) +double Model::eval(std::vector<double> x_in) const { int n_leaves_tot = std::accumulate(_feats.begin(), _feats.end(), 0, [](int tot, model_node_ptr feat){return tot + feat->n_leaves();}); if(x_in.size() != n_leaves_tot) @@ -45,7 +45,7 @@ double Model::eval(std::vector<double> x_in) return eval(x_in.data()); } -double Model::eval(std::map<std::string, double> x_in_dct) +double Model::eval(std::map<std::string, double> x_in_dct) const { std::vector<double> x_in; for(auto& feat : _feats) @@ -68,7 +68,7 @@ double Model::eval(std::map<std::string, double> x_in_dct) } -std::vector<double> Model::eval(std::vector<std::vector<double>> x_in) +std::vector<double> Model::eval(std::vector<std::vector<double>> x_in) const { int n_leaves_tot = std::accumulate(_feats.begin(), _feats.end(), 0, [](int tot, model_node_ptr feat){return tot + feat->n_leaves();}); if(x_in.size() != n_leaves_tot) @@ -91,7 +91,7 @@ std::vector<double> Model::eval(std::vector<std::vector<double>> x_in) return eval(x_in.data()); } -std::vector<double> Model::eval(std::map<std::string, std::vector<double>> x_in_dct) +std::vector<double> Model::eval(std::map<std::string, std::vector<double>> x_in_dct) const { std::vector<std::vector<double>> x_in; for(auto& feat :_feats) diff --git a/src/descriptor_identifier/Model/Model.hpp b/src/descriptor_identifier/Model/Model.hpp index 7a409abd6a5aa26694d3859fc5118f1b10c2c735..0022ef8e673db2f9f28fbde5749a64db6b5f2f3b 100644 --- a/src/descriptor_identifier/Model/Model.hpp +++ b/src/descriptor_identifier/Model/Model.hpp @@ -77,14 +77,14 @@ public: * @param fix_intercept If true the intercept of the model is 0 */ Model( - std::string prop_label, - Unit prop_unit, - std::vector<double> prop_train, - std::vector<double> prop_test, - std::vector<model_node_ptr> feats, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - bool fix_intercept + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop_train, + const std::vector<double> prop_test, + const std::vector<model_node_ptr> feats, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const bool fix_intercept ); /** @@ -121,7 +121,7 @@ public: * @param x_in pointer to the new data point (order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - virtual double eval(double* x_in) = 0; + virtual double eval(double* x_in) const = 0; /** * @brief Evaluate the model for a new point @@ -129,7 +129,7 @@ public: * @param x_in The data point to evaluate the model (order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - double eval(std::vector<double> x_in); + double eval(std::vector<double> x_in) const; /** * @brief Evaluate the model for a new point @@ -137,7 +137,7 @@ public: * @param x_in_dct Dictionary describing the new point ("feature expr": value) * @return The prediction of the model for a given data point */ - double eval(std::map<std::string, double> x_in_dct); + double eval(std::map<std::string, double> x_in_dct) const; /** * @brief Evaluate the model for a new set of new points @@ -145,7 +145,7 @@ public: * @param x_in a vector of pointers to the set of values for new data points (one pointer per each feature and order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given set of data points */ - virtual std::vector<double> eval(std::vector<double>* x_in) = 0; + virtual std::vector<double> eval(std::vector<double>* x_in) const = 0; /** * @brief Evaluate the model for a set of new points @@ -153,7 +153,7 @@ public: * @param x_in The set data for a set of new data points (size of n_feature x n_points, and order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - std::vector<double> eval(std::vector<std::vector<double>> x_in); + std::vector<double> eval(std::vector<std::vector<double>> x_in) const; /** * @brief Evaluate the model for a set of new points @@ -161,7 +161,7 @@ public: * @param x_in_dct The set of data points to evaluate the model. Keys must be strings representing feature expressions and vectors must be the same length * @return The prediction of the model for a given data point */ - std::vector<double> eval(std::map<std::string, std::vector<double>> x_in_dct); + std::vector<double> eval(std::map<std::string, std::vector<double>> x_in_dct) const; // DocString: model_set_task_eval /** @@ -177,25 +177,25 @@ public: * * @returns _task_eval Task to evaluate */ - inline int get_task_eval(){return _task_eval;} + inline int get_task_eval() const {return _task_eval;} // DocString: model_n_samp_train /** * @brief Total number of samples being trained on */ - inline int n_samp_train(){return _n_samp_train;} + inline int n_samp_train() const {return _n_samp_train;} // DocString: model_n_samp_test /** * @brief Total number of samples being tested */ - inline int n_samp_test(){return _n_samp_test;} + inline int n_samp_test() const {return _n_samp_test;} // DocString: model_n_dim /** * @brief The dimensionality of the data */ - inline int n_dim(){return _n_dim;} + inline int n_dim() const {return _n_dim;} // DocString: model_prop_unit /** @@ -209,7 +209,7 @@ public: * @brief The label for the property * @return The label for the property */ - inline std::string prop_label(){return _prop_label;} + inline std::string prop_label() const {return _prop_label;} /** * @brief Convert the Model into an output file @@ -218,19 +218,19 @@ public: * @param train If true output the training data * @param test_inds The indexes of the test set */ - virtual void to_file(std::string filename, bool train=true, std::vector<int> test_inds={}) = 0; + virtual void to_file(const std::string filename, const bool train=true, const std::vector<int> test_inds={}) const = 0; // DocString: model_fix_intercept /** * @brief Access whether the intercept is fixed at 0 or not */ - inline bool fix_intercept(){return _fix_intercept;} + inline bool fix_intercept() const {return _fix_intercept;} /** * @brief Accessor to the coefficients for the model * @return The coefficients for the model for each task */ - inline std::vector<std::vector<double>> coefs(){return _coefs;} + inline std::vector<std::vector<double>> coefs() const {return _coefs;} #ifdef PY_BINDINGS // DocString: model_coefs @@ -291,7 +291,7 @@ public: * @param x_in The data point to evaluate the model (order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - inline double eval_py(np::ndarray x_in){return eval(python_conv_utils::from_ndarray<double>(x_in));} + inline double eval_py(np::ndarray x_in) const {return eval(python_conv_utils::from_ndarray<double>(x_in));} /** * @brief Evaluate the model for a new point @@ -299,7 +299,7 @@ public: * @param x_in The data point to evaluate the model (order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - inline double eval_py(py::list x_in){return eval(python_conv_utils::from_list<double>(x_in));} + inline double eval_py(py::list x_in) const {return eval(python_conv_utils::from_list<double>(x_in));} /** * @brief Evaluate the model for a new point @@ -307,7 +307,7 @@ public: * @param x_in_dct Dictionary describing the new point ("feature expr": value) * @return The prediction of the model for a given data point */ - inline double eval_py(py::dict x_in){return eval(python_conv_utils::from_dict<std::string, double>(x_in));} + inline double eval_py(py::dict x_in) const {return eval(python_conv_utils::from_dict<std::string, double>(x_in));} /** * @brief Evaluate the model for a set of new points @@ -315,7 +315,7 @@ public: * @param x_in The set data for a set of new data points (size of n_feature x n_points, and order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - np::ndarray eval_many_py(np::ndarray x_in); + np::ndarray eval_many_py(np::ndarray x_in) const ; /** * @brief Evaluate the model for a set of new points @@ -323,7 +323,7 @@ public: * @param x_in_dct The set of data points to evaluate the model. Keys must be strings representing feature expressions and vectors must be the same length * @return The prediction of the model for a given data point */ - np::ndarray eval_many_py(py::dict x_in); + np::ndarray eval_many_py(py::dict x_in) const ; #endif }; diff --git a/src/descriptor_identifier/Model/ModelClassifier.cpp b/src/descriptor_identifier/Model/ModelClassifier.cpp index 35a8fd4e2ab7e04ac7d21243c684d4d592a7387f..f1f33766e886380f7df00e2b96051f2b3c240e95 100644 --- a/src/descriptor_identifier/Model/ModelClassifier.cpp +++ b/src/descriptor_identifier/Model/ModelClassifier.cpp @@ -1,14 +1,14 @@ #include <descriptor_identifier/Model/ModelClassifier.hpp> ModelClassifier::ModelClassifier( - std::string prop_label, - Unit prop_unit, - std::vector<double> prop_train, - std::vector<double> prop_test, - std::vector<model_node_ptr> feats, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - bool fix_intercept + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop_train, + const std::vector<double> prop_test, + const std::vector<model_node_ptr> feats, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const bool fix_intercept ) : Model(prop_label, prop_unit, prop_train, prop_test, feats, task_sizes_train, task_sizes_test, fix_intercept), _prop_train_est(_prop_train), @@ -72,7 +72,7 @@ ModelClassifier::ModelClassifier( _test_n_svm_misclassified = std::accumulate(test_misclassified.begin(), test_misclassified.end(), 0); } -ModelClassifier::ModelClassifier(std::string train_file) +ModelClassifier::ModelClassifier(const std::string train_file) { _n_samp_test = 0; @@ -128,7 +128,7 @@ ModelClassifier::ModelClassifier(std::string train_file) throw std::logic_error("The file does not have the same convex overlap (" + std::to_string(file_train_n_convex_overlap) + ") as calculated here (" + std::to_string(_train_n_convex_overlap) + ")."); } -ModelClassifier::ModelClassifier(std::string train_file, std::string test_file) +ModelClassifier::ModelClassifier(const std::string train_file, const std::string test_file) { std::vector<std::string> split_str; std::vector<std::string> feature_expr_train = populate_model(train_file, true); @@ -203,21 +203,21 @@ ModelClassifier::ModelClassifier(std::string train_file, std::string test_file) } } -double ModelClassifier::eval(double* x_in) +double ModelClassifier::eval(double* x_in) const { double result = 0.0; throw std::logic_error("Eval not defined for classifier."); return result; } -std::vector<double> ModelClassifier::eval(std::vector<double>* x_in) +std::vector<double> ModelClassifier::eval(std::vector<double>* x_in) const { std::vector<double> result(0, 0.0); throw std::logic_error("Eval not defined for classifier."); return result; } -std::vector<std::string> ModelClassifier::populate_model(std::string filename, bool train) +std::vector<std::string> ModelClassifier::populate_model(const std::string filename, const bool train) { std::ifstream file_stream; file_stream.open(filename, std::ios::in); @@ -578,7 +578,7 @@ std::ostream& operator<< (std::ostream& outStream, const ModelClassifier& model) return outStream; } -void ModelClassifier::to_file(std::string filename, bool train, std::vector<int> test_inds) +void ModelClassifier::to_file(std::string filename, bool train, std::vector<int> test_inds) const { boost::filesystem::path p(filename.c_str()); boost::filesystem::path parent = p.remove_filename(); diff --git a/src/descriptor_identifier/Model/ModelClassifier.hpp b/src/descriptor_identifier/Model/ModelClassifier.hpp index 9f3eb80f552a32558fba5fa6cb6d0e25e80b0229..04e4ffd65652bd58ab3c360f26d6675168fc6215 100644 --- a/src/descriptor_identifier/Model/ModelClassifier.hpp +++ b/src/descriptor_identifier/Model/ModelClassifier.hpp @@ -66,14 +66,14 @@ public: * @param fix_intercept if True then the intercept is 0.0 */ ModelClassifier( - std::string prop_label, - Unit prop_unit, - std::vector<double> prop_train, - std::vector<double> prop_test, - std::vector<model_node_ptr> feats, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - bool fix_intercept + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop_train, + const std::vector<double> prop_test, + const std::vector<model_node_ptr> feats, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const bool fix_intercept ); // DocString: model_class_init_str @@ -83,7 +83,7 @@ public: * * @param train_file Previously generated model file */ - ModelClassifier(std::string train_file); + ModelClassifier(const std::string train_file); // DocString: model_class_init_str_str /** @@ -93,7 +93,7 @@ public: * @param train_file Previously generated training model output file * @param train_file Previously generated testing model output file */ - ModelClassifier(std::string train_file, std::string test_file); + ModelClassifier(const std::string train_file, const std::string test_file); /** * @brief Copy Constructor @@ -129,7 +129,7 @@ public: * @param x_in pointer to the new data point (order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - double eval(double* x_in); + double eval(double* x_in) const; /** * @brief Evaluate the model for a new set of new points @@ -137,7 +137,7 @@ public: * @param x_in a vector of pointers to the set of values for new data points (one pointer per each feature and order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given set of data points */ - std::vector<double> eval(std::vector<double>* x_in); + std::vector<double> eval(std::vector<double>* x_in) const; /** * @brief Read an output file and extract all relevant information @@ -171,7 +171,7 @@ public: * * @param res pointer to the beginning of the vector to store the residual */ - inline void copy_error(double* res){std::copy_n(_train_error.data(), _n_samp_train, res);} + inline void copy_error(double* res) const {std::copy_n(_train_error.data(), _n_samp_train, res);} /** * @brief Convert the ModelClassifier into an output file @@ -180,7 +180,7 @@ public: * @param train If true output the training data * @param test_inds The indexes of the test set */ - void to_file(std::string filename, bool train=true, std::vector<int> test_inds={}); + void to_file(const std::string filename, const bool train=true, const std::vector<int> test_inds={}) const; /** * @brief Set the train/test error of the model using linear programming @@ -191,31 +191,31 @@ public: /** * @brief The number of samples in overlapping convex hull regions (training data) */ - inline int n_convex_overlap_train(){return _train_n_convex_overlap;} + inline int n_convex_overlap_train() const {return _train_n_convex_overlap;} // DocString: model_classn_convex_overlap_test /** * @brief The number of samples in overlapping convex hull regions (test data) */ - inline int n_convex_overlap_test(){return _test_n_convex_overlap;} + inline int n_convex_overlap_test() const {return _test_n_convex_overlap;} // DocString: model_classn_svm_misclassified_train /** * @brief The number of samples misclassified by SVM (training data) */ - inline int n_svm_misclassified_train(){return _train_n_svm_misclassified;} + inline int n_svm_misclassified_train() const {return _train_n_svm_misclassified;} // DocString: model_classn_svm_misclassified_test /** * @brief The number of samples misclassified by SVM (training data) */ - inline int n_svm_misclassified_test(){return _test_n_svm_misclassified;} + inline int n_svm_misclassified_test() const {return _test_n_svm_misclassified;} // DocString: model_class_precent_train_error /** * @brief Percent of all training samples misclassified */ - inline double percent_train_error() + inline double percent_train_error() const { return 100.0 * static_cast<double>(_train_n_convex_overlap) / static_cast<double>(_n_samp_train); } @@ -224,7 +224,7 @@ public: /** * @brief Percent of all tset samples misclassified */ - inline double percent_test_error() + inline double percent_test_error() const { return 100.0 * static_cast<double>(_test_n_convex_overlap) / static_cast<double>(_n_samp_test); } diff --git a/src/descriptor_identifier/Model/ModelLogRegressor.cpp b/src/descriptor_identifier/Model/ModelLogRegressor.cpp index ddb1550e461a1bd7f78823c61575e4186b05efee..652d2975dd427b5a72f8e53eebbe45b1f942a428 100644 --- a/src/descriptor_identifier/Model/ModelLogRegressor.cpp +++ b/src/descriptor_identifier/Model/ModelLogRegressor.cpp @@ -4,14 +4,14 @@ ModelLogRegressor::ModelLogRegressor() {} ModelLogRegressor::ModelLogRegressor( - std::string prop_label, - Unit prop_unit, - std::vector<double> prop_train, - std::vector<double> prop_test, - std::vector<model_node_ptr> feats, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - bool fix_intercept + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop_train, + const std::vector<double> prop_test, + const std::vector<model_node_ptr> feats, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const bool fix_intercept ) : ModelRegressor(prop_label, prop_unit, prop_train, prop_test, feats, task_sizes_train, task_sizes_test, fix_intercept, false), _log_prop_train(_n_samp_train, 0.0), @@ -119,7 +119,7 @@ ModelLogRegressor::ModelLogRegressor( std::transform(_prop_test.begin(), _prop_test.end(), _prop_test_est.begin(), _test_error.begin(), [](double pt, double pt_est){return pt - pt_est;}); } -ModelLogRegressor::ModelLogRegressor(std::string train_file) : +ModelLogRegressor::ModelLogRegressor(const std::string train_file) : ModelRegressor(train_file), _log_prop_train(_n_samp_train, 0.0), _log_prop_test(_n_samp_test, 0.0), @@ -141,7 +141,7 @@ ModelLogRegressor::ModelLogRegressor(std::string train_file) : ); } -ModelLogRegressor::ModelLogRegressor(std::string train_file, std::string test_file) : +ModelLogRegressor::ModelLogRegressor(const std::string train_file, const std::string test_file) : ModelRegressor(train_file, test_file), _log_prop_train(_n_samp_train, 0.0), _log_prop_test(_n_samp_test, 0.0), @@ -178,7 +178,7 @@ ModelLogRegressor::ModelLogRegressor(std::string train_file, std::string test_fi ); } -double ModelLogRegressor::eval(double* x_in) +double ModelLogRegressor::eval(double* x_in) const { double result = _coefs[_task_eval].back(); int ff = 0; @@ -191,7 +191,7 @@ double ModelLogRegressor::eval(double* x_in) return result; } -std::vector<double> ModelLogRegressor::eval(std::vector<double>* x_in) +std::vector<double> ModelLogRegressor::eval(std::vector<double>* x_in) const { std::vector<double> result(x_in->size(), _coefs[_task_eval].back()); int ff = 0; diff --git a/src/descriptor_identifier/Model/ModelLogRegressor.hpp b/src/descriptor_identifier/Model/ModelLogRegressor.hpp index 14410372cf76dccc8e36ca309c797e55f883cfcc..ef9b5109d6c06fbe4333f90a0a71932cf5bdcb02 100644 --- a/src/descriptor_identifier/Model/ModelLogRegressor.hpp +++ b/src/descriptor_identifier/Model/ModelLogRegressor.hpp @@ -56,14 +56,14 @@ public: * @param fix_intercept if True then the intercept is 0.0 */ ModelLogRegressor( - std::string prop_label, - Unit prop_unit, - std::vector<double> prop_train, - std::vector<double> prop_test, - std::vector<model_node_ptr> feats, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - bool fix_intercept + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop_train, + const std::vector<double> prop_test, + const std::vector<model_node_ptr> feats, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const bool fix_intercept ); // DocString: model_log_reg_init_str @@ -119,7 +119,7 @@ public: * @param x_in pointer to the new data point (order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - double eval(double* x_in); + double eval(double* x_in) const; /** * @brief Evaluate the model for a new set of new points @@ -127,7 +127,7 @@ public: * @param x_in a vector of pointers to the set of values for new data points (one pointer per each feature and order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given set of data points */ - std::vector<double> eval(std::vector<double>* x_in); + std::vector<double> eval(std::vector<double>* x_in) const; // DocString: model_log_reg_str /** @@ -150,7 +150,7 @@ public: * * @param res pointer to the beginning of the vector to store the residual */ - virtual inline void copy_error(double* res){std::copy_n(_log_train_error.data(), _n_samp_train, res);} + virtual inline void copy_error(double* res) const {std::copy_n(_log_train_error.data(), _n_samp_train, res);} }; /** diff --git a/src/descriptor_identifier/Model/ModelRegressor.cpp b/src/descriptor_identifier/Model/ModelRegressor.cpp index 4d68e0f1a59beb7583a00d70a32c6e36baaa99d6..dd38fc2579b1177edbfdc421b14d03ea668d5832 100644 --- a/src/descriptor_identifier/Model/ModelRegressor.cpp +++ b/src/descriptor_identifier/Model/ModelRegressor.cpp @@ -4,15 +4,15 @@ ModelRegressor::ModelRegressor() {} ModelRegressor::ModelRegressor( - std::string prop_label, - Unit prop_unit, - std::vector<double> prop_train, - std::vector<double> prop_test, - std::vector<model_node_ptr> feats, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - bool fix_intercept, - bool fill_vecs + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop_train, + const std::vector<double> prop_test, + const std::vector<model_node_ptr> feats, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const bool fix_intercept, + const bool fill_vecs ) : Model(prop_label, prop_unit, prop_train, prop_test, feats, task_sizes_train, task_sizes_test, fix_intercept), _prop_train_est(_n_samp_train, 0.0), @@ -85,7 +85,7 @@ ModelRegressor::ModelRegressor( } } -ModelRegressor::ModelRegressor(std::string train_file) +ModelRegressor::ModelRegressor(const std::string train_file) { _n_samp_test = 0; @@ -132,7 +132,7 @@ ModelRegressor::ModelRegressor(std::string train_file) } } -ModelRegressor::ModelRegressor(std::string train_file, std::string test_file) +ModelRegressor::ModelRegressor(const std::string train_file, std::string test_file) { std::vector<std::string> split_str; std::vector<std::string> feature_expr_train = populate_model(train_file, true); @@ -185,7 +185,7 @@ ModelRegressor::ModelRegressor(std::string train_file, std::string test_file) } } -double ModelRegressor::eval(double* x_in) +double ModelRegressor::eval(double* x_in) const { double result = _coefs[_task_eval].back(); int ff = 0; @@ -198,7 +198,7 @@ double ModelRegressor::eval(double* x_in) return result; } -std::vector<double> ModelRegressor::eval(std::vector<double>* x_in) +std::vector<double> ModelRegressor::eval(std::vector<double>* x_in) const { std::vector<double> result(x_in->size(), _coefs[_task_eval].back()); int ff = 0; @@ -213,7 +213,7 @@ std::vector<double> ModelRegressor::eval(std::vector<double>* x_in) return result; } -std::vector<std::string> ModelRegressor::populate_model(std::string filename, bool train) +std::vector<std::string> ModelRegressor::populate_model(const std::string filename, const bool train) { std::ifstream file_stream; file_stream.open(filename, std::ios::in); @@ -420,7 +420,7 @@ std::ostream& operator<< (std::ostream& outStream, const ModelRegressor& model) return outStream; } -void ModelRegressor::to_file(std::string filename, bool train, std::vector<int> test_inds) +void ModelRegressor::to_file(const std::string filename, const bool train, const std::vector<int> test_inds) const { boost::filesystem::path p(filename.c_str()); boost::filesystem::path parent = p.remove_filename(); @@ -540,7 +540,7 @@ void ModelRegressor::to_file(std::string filename, bool train, std::vector<int> out_file_stream.close(); } -std::vector<double> ModelRegressor::sorted_error() +std::vector<double> ModelRegressor::sorted_error() const { std::vector<double> sorted_error(_train_error.size(), 0.0); std::copy_n(_train_error.data(), _train_error.size(), sorted_error.data()); @@ -549,7 +549,7 @@ std::vector<double> ModelRegressor::sorted_error() return sorted_error; } -std::vector<double> ModelRegressor::sorted_test_error() +std::vector<double> ModelRegressor::sorted_test_error() const { std::vector<double> sorted_error(_test_error.size(), 0.0); std::copy_n(_test_error.data(), _test_error.size(), sorted_error.data()); @@ -558,14 +558,14 @@ std::vector<double> ModelRegressor::sorted_test_error() return sorted_error; } -double ModelRegressor::mape() +double ModelRegressor::mape() const { std::vector<double> percent_error(_train_error.size(), 0.0); std::transform(_train_error.begin(), _train_error.end(), _prop_train.begin(), percent_error.begin(), [](double e, double p){return std::abs(e / p);}); return util_funcs::mean<double>(percent_error); } -double ModelRegressor::test_mape() +double ModelRegressor::test_mape() const { std::vector<double> percent_error(_test_error.size(), 0.0); std::transform(_test_error.begin(), _test_error.end(), _prop_test.begin(), percent_error.begin(), [](double e, double p){return std::abs(e / p);}); diff --git a/src/descriptor_identifier/Model/ModelRegressor.hpp b/src/descriptor_identifier/Model/ModelRegressor.hpp index 7bdfb8e47ed441f9653e89eb8f8e6857edc0e3c4..a70182eae97e4700c6ba7d26256f9037940f7f8d 100644 --- a/src/descriptor_identifier/Model/ModelRegressor.hpp +++ b/src/descriptor_identifier/Model/ModelRegressor.hpp @@ -62,15 +62,15 @@ public: * @param fill_vecs if true perform least squares regression (used because of log regressors need to use this) */ ModelRegressor( - std::string prop_label, - Unit prop_unit, - std::vector<double> prop_train, - std::vector<double> prop_test, - std::vector<model_node_ptr> feats, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - bool fix_intercept, - bool fill_vecs=true + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop_train, + const std::vector<double> prop_test, + const std::vector<model_node_ptr> feats, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const bool fix_intercept, + const bool fill_vecs=true ); // DocString: model_reg_init_str @@ -80,7 +80,7 @@ public: * * @param train_file Previously generated model file */ - ModelRegressor(std::string train_file); + ModelRegressor(const std::string train_file); // DocString: model_reg_init_str_str /** @@ -90,7 +90,7 @@ public: * @param train_file Previously generated training model output file * @param train_file Previously generated testing model output file */ - ModelRegressor(std::string train_file, std::string test_file); + ModelRegressor(const std::string train_file, const std::string test_file); /** * @brief Copy Constructor @@ -112,7 +112,7 @@ public: * @param x_in pointer to the new data point (order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given data point */ - virtual double eval(double* x_in); + virtual double eval(double* x_in) const; /** * @brief Evaluate the model for a new set of new points @@ -120,7 +120,7 @@ public: * @param x_in a vector of pointers to the set of values for new data points (one pointer per each feature and order the same as appending the results of _feats[nn]->get_x_in_expr_list() for all feature) * @return The prediction of the model for a given set of data points */ - virtual std::vector<double> eval(std::vector<double>* x_in); + virtual std::vector<double> eval(std::vector<double>* x_in) const; /** * @brief Copy Assignment operator @@ -145,7 +145,7 @@ public: * * @return Vector of strings describing feature meta data */ - virtual std::vector<std::string> populate_model(std::string filename, bool train); + virtual std::vector<std::string> populate_model(const std::string filename, const bool train); // DocString: model_reg_str /** @@ -168,7 +168,7 @@ public: * * @param res pointer to the beginning of the vector to store the residual */ - virtual inline void copy_error(double* res) + virtual inline void copy_error(double* res) const { std::copy_n(_train_error.data(), _n_samp_train, res); } @@ -177,7 +177,7 @@ public: /** * @brief The training rmse of the model */ - inline double rmse() + inline double rmse() const { return util_funcs::norm(_train_error.data(), _n_samp_train) / std::sqrt(static_cast<double>(_n_samp_train)); } @@ -186,7 +186,7 @@ public: /** * @brief The test rmse of the model */ - inline double test_rmse() + inline double test_rmse() const { return util_funcs::norm(_test_error.data(), _n_samp_test) / std::sqrt(static_cast<double>(_n_samp_test)); } @@ -195,7 +195,7 @@ public: /** * @brief The training R^2 of the model */ - inline double r2() + inline double r2() const { return util_funcs::r2(_prop_train.data(), _prop_train_est.data(), _n_samp_train); } @@ -204,7 +204,7 @@ public: /** * @brief The test R^2 of the model */ - inline double test_r2() + inline double test_r2() const { return util_funcs::r2(_prop_test.data(), _prop_test_est.data(), _n_samp_test); } @@ -213,7 +213,7 @@ public: /** * @brief The max Absolute error of the training data */ - inline double max_ae() + inline double max_ae() const { return std::abs( *std::max_element( @@ -228,7 +228,7 @@ public: /** * @brief The max Absolute error of the testing data */ - inline double test_max_ae() + inline double test_max_ae() const { return std::abs( *std::max_element( @@ -244,7 +244,7 @@ public: * @brief The mean absolute error of the model * @return The mean absolute error of the training data */ - inline double mae() + inline double mae() const { return 1.0 / static_cast<double>(_n_samp_train) * std::accumulate( _train_error.begin(), @@ -259,7 +259,7 @@ public: * @brief The mean absolute test error of the model * @return The mean absolute error of the test data */ - inline double test_mae() + inline double test_mae() const { return 1.0 / static_cast<double>(_n_samp_test) * std::accumulate( _test_error.begin(), @@ -274,33 +274,33 @@ public: * @brief The mean absolute error of the model * @return The mean absolute error of the training data */ - double mape(); + double mape() const; // DocString: model_reg_test_mape /** * @brief The mean absolute test error of the model * @return The mean absolute error of the test data */ - double test_mape(); + double test_mape() const; /** * @brief Sort the training error based on magnitude * @return The error vector sorted */ - std::vector<double> sorted_error(); + std::vector<double> sorted_error() const; /** * @brief Sort the training test_error based on magnitude * @return The test_error vector sorted */ - std::vector<double> sorted_test_error(); + std::vector<double> sorted_test_error() const; // DocString: model_reg_percentile_25_ae /** * @brief The the 25 percentile error of the model * @return The the 25 percentile error of the training data */ - inline double percentile_25_ae() + inline double percentile_25_ae() const { return sorted_error()[static_cast<int>(floor(_n_samp_train * 0.25))]; } @@ -310,7 +310,7 @@ public: * @brief The 25 percentile test error of the model * @return The 25 percentile error of the test data */ - inline double percentile_25_test_ae() + inline double percentile_25_test_ae() const { return sorted_test_error()[static_cast<int>(floor(_n_samp_test * 0.25))]; } @@ -320,7 +320,7 @@ public: * @brief The 50 percentile error of the model * @return The 50 percentile error of the training data */ - inline double percentile_50_ae() + inline double percentile_50_ae() const { return sorted_error()[static_cast<int>(floor(_n_samp_train * 0.50))]; } @@ -330,7 +330,7 @@ public: * @brief The 50 percentile test error of the model * @return The 50 percentile error of the test data */ - inline double percentile_50_test_ae() + inline double percentile_50_test_ae() const { return sorted_test_error()[static_cast<int>(floor(_n_samp_test * 0.50))]; } @@ -340,7 +340,7 @@ public: * @brief The 75 percentile error of the model * @return The 75 percentile error of the training data */ - inline double percentile_75_ae() + inline double percentile_75_ae() const { return sorted_error()[static_cast<int>(floor(_n_samp_train * 0.75))]; } @@ -350,7 +350,7 @@ public: * @brief The 75 percentile test error of the model * @return The 75 percentile error of the test data */ - inline double percentile_75_test_ae() + inline double percentile_75_test_ae() const { return sorted_test_error()[static_cast<int>(floor(_n_samp_test * 0.75))]; } @@ -360,7 +360,7 @@ public: * @brief The 95 percentile error of the model * @return The 95 percentile error of the training data */ - inline double percentile_95_ae() + inline double percentile_95_ae() const { return sorted_error()[static_cast<int>(floor(_n_samp_train * 0.95))]; } @@ -370,7 +370,7 @@ public: * @brief The 95 percentile test error of the model * @return The 95 percentile error of the test data */ - inline double percentile_95_test_ae() + inline double percentile_95_test_ae() const { return sorted_test_error()[static_cast<int>(floor(_n_samp_test * 0.95))]; } @@ -382,7 +382,7 @@ public: * @param train If true output the training data * @param test_inds The indexes of the test set */ - virtual void to_file(std::string filename, bool train=true, std::vector<int> test_inds={}); + virtual void to_file(const std::string filename, const bool train=true, const std::vector<int> test_inds={}) const; #ifdef PY_BINDINGS // DocString: model_reg_prop_train_est diff --git a/src/descriptor_identifier/SISSO_DI/SISSOClassifier.cpp b/src/descriptor_identifier/SISSO_DI/SISSOClassifier.cpp index 75678fef4988e6c383ddd790a5b3fd9f2a7addff..7ac3b5d2b6009b9fb2fa601b35db177dc4f1cd47 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSOClassifier.cpp +++ b/src/descriptor_identifier/SISSO_DI/SISSOClassifier.cpp @@ -1,17 +1,17 @@ #include <descriptor_identifier/SISSO_DI/SISSOClassifier.hpp> SISSOClassifier::SISSOClassifier( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, - int n_dim, - int n_residual, - int n_models_store + const std::shared_ptr<FeatureSpace> feat_space, + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop, + const std::vector<double> prop_test, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const std::vector<int> leave_out_inds, + const int n_dim, + const int n_residual, + const int n_models_store ): SISSO_DI(feat_space, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, false), _c(1000.0), @@ -22,7 +22,7 @@ SISSOClassifier::SISSOClassifier( setup_prop(prop, prop_test); } -void SISSOClassifier::check_prop_test(std::vector<double> prop, std::vector<double> prop_test) +void SISSOClassifier::check_prop_test(const std::vector<double> prop, const std::vector<double> prop_test) const { for(auto& pt : prop_test) { @@ -33,7 +33,6 @@ void SISSOClassifier::check_prop_test(std::vector<double> prop, std::vector<doub } } - void SISSOClassifier::setup_prop(std::vector<double> prop, std::vector<double> prop_test) { std::vector<int> inds = util_funcs::argsort<double>(prop); @@ -72,7 +71,7 @@ void SISSOClassifier::setup_prop(std::vector<double> prop, std::vector<double> p std::copy_n(prop_test.begin(), prop_test.size(), _prop_test.begin()); } -std::vector<LPWrapper> SISSOClassifier::setup_lp(int n_dim) +std::vector<LPWrapper> SISSOClassifier::setup_lp(const int n_dim) { std::vector<int> n_samp_per_class; int task_start = 0; @@ -112,7 +111,7 @@ std::vector<LPWrapper> SISSOClassifier::setup_lp(int n_dim) return lp; } -std::array<double, 2> SISSOClassifier::svm_error(std::vector<SVMWrapper>& svm, std::vector<int>& feat_inds) +std::array<double, 2> SISSOClassifier::svm_error(std::vector<SVMWrapper>& svm, const std::vector<int>& feat_inds) const { double error = 0.0; double dist_error = 0.0; @@ -129,12 +128,15 @@ std::array<double, 2> SISSOClassifier::svm_error(std::vector<SVMWrapper>& svm, s return {error, dist_error}; } -int SISSOClassifier::get_max_error_ind(int n_models, int* n_convex_overlap, double* svm_score, double* svm_margin, double* scores) +int SISSOClassifier::get_max_error_ind( + const int n_models, const int* n_convex_overlap, const double* svm_score, const double* svm_margin, double* scores +) const { std::transform( n_convex_overlap, n_convex_overlap + n_models, - svm_score, scores, + svm_score, + scores, [this](int n_overlap, double score){return static_cast<double>(n_overlap * _n_samp * _n_class) + score;} ); @@ -150,7 +152,7 @@ int SISSOClassifier::get_max_error_ind(int n_models, int* n_convex_overlap, doub return std::max_element(scores, scores + n_models) - scores; } -void SISSOClassifier::transfer_d_mat_to_sorted() +void SISSOClassifier::transfer_d_mat_to_sorted() const { prop_sorted_d_mat::resize_sroted_d_matrix_arr(node_value_arrs::N_SELECTED); @@ -165,7 +167,7 @@ void SISSOClassifier::transfer_d_mat_to_sorted() } } -void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim) +void SISSOClassifier::l0_norm(std::vector<double>& prop, const int n_dim) { int n_get_models = std::max(_n_residual, _n_models_store); diff --git a/src/descriptor_identifier/SISSO_DI/SISSOClassifier.hpp b/src/descriptor_identifier/SISSO_DI/SISSOClassifier.hpp index 39cbf97211ffdc445fa5b1ef1b1fe732a1acce22..1a63af0b306d6b812767cad54ed6e9d0cc9be9c2 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSOClassifier.hpp +++ b/src/descriptor_identifier/SISSO_DI/SISSOClassifier.hpp @@ -37,8 +37,8 @@ protected: std::map<int, int> _sample_inds_to_sorted_dmat_inds; //!< map from input sample inds to the SORTED_D_MATRIX_INDS - double _c; //!< the C value for the SVM results - double _width; //!< The tolerance used for calculating overlap (data point to check, 1.0 +/- width for the row constraints) + const double _c; //!< the C value for the SVM results + const double _width; //!< The tolerance used for calculating overlap (data point to check, 1.0 +/- width for the row constraints) int _n_class; //!< The number of classes in the calculation @@ -58,16 +58,16 @@ public: * @param n_models_store Number of features to store in files */ SISSOClassifier( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, - int n_dim, - int n_residual, - int n_models_store + const std::shared_ptr<FeatureSpace> feat_space, + const std::string prop_label, Unit prop_unit, + const std::vector<double> prop, + const std::vector<double> prop_test, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const std::vector<int> leave_out_inds, + const int n_dim, + const int n_residual, + const int n_models_store ); /** @@ -76,8 +76,7 @@ public: * @param prop The property vector for the training data * @param prop_test The property vector for the test data */ - void check_prop_test(std::vector<double> prop, std::vector<double> prop_test); - + void check_prop_test(const std::vector<double> prop, const std::vector<double> prop_test) const; /** * @brief Convert the property vectors into something that SharkML's SVM expects @@ -90,12 +89,12 @@ public: /** * @brief set up the data structures for finding the number of overlap points for a linear programming problem */ - std::vector<LPWrapper> setup_lp(int n_dim = 1); + std::vector<LPWrapper> setup_lp(const int n_dim = 1); /** * @brief Transfer data from the node value array D-matrix into a property sorted array */ - void transfer_d_mat_to_sorted(); + void transfer_d_mat_to_sorted() const; /** * @brief Find the SVM error and margin for a given set of indexes @@ -104,7 +103,7 @@ public: * @param feat_inds The indexes to develop the SVM model for * @return {The number of misclassified points, the SVM margin} */ - std::array<double, 2> svm_error(std::vector<SVMWrapper>& svm, std::vector<int>& feat_inds); + std::array<double, 2> svm_error(std::vector<SVMWrapper>& svm, const std::vector<int>& feat_inds) const; /** * @brief Gets the max error index for the classification problem @@ -117,7 +116,7 @@ public: * @param scores the pointer to the scores array * @return Get the index with the maximum error */ - int get_max_error_ind(int n_models, int* n_convex_overlap, double* svm_score, double* svm_margin, double* scores); + int get_max_error_ind(const int n_models, const int* n_convex_overlap, const double* svm_score, const double* svm_margin, double* scores) const; /** * @brief Preform the l0 normalization for a property or the residual @@ -125,7 +124,7 @@ public: * @param prop The property to fit * @param n_dim the dimensionality of the model */ - void l0_norm(std::vector<double>& prop, int n_dim); + void l0_norm(std::vector<double>& prop, const int n_dim); // DocString: sisso_class_fit /** @@ -137,7 +136,7 @@ public: /** * @brief Acessor function for models */ - inline std::vector<std::vector<ModelClassifier>> models(){return _models;} + inline std::vector<std::vector<ModelClassifier>> models() const {return _models;} // Python interface functions #ifdef PY_BINDINGS diff --git a/src/descriptor_identifier/SISSO_DI/SISSOLogRegressor.cpp b/src/descriptor_identifier/SISSO_DI/SISSOLogRegressor.cpp index 855ab6c71311c805d6e9d5354e2ea7654c4cca46..bb5d72e5a2c5936aab87882f280e56fe407f9d86 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSOLogRegressor.cpp +++ b/src/descriptor_identifier/SISSO_DI/SISSOLogRegressor.cpp @@ -1,18 +1,18 @@ #include <descriptor_identifier/SISSO_DI/SISSOLogRegressor.hpp> SISSOLogRegressor::SISSOLogRegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - bool fix_intercept + const std::shared_ptr<FeatureSpace> feat_space, + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop, + const std::vector<double> prop_test, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const std::vector<int> leave_out_inds, + const int n_dim, + const int n_residual, + const int n_models_store, + const bool fix_intercept ): SISSORegressor(feat_space, prop_label, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept), _prop_no_log(prop) @@ -20,7 +20,7 @@ SISSOLogRegressor::SISSOLogRegressor( std::transform(_prop.begin(), _prop.end(), _prop.begin(), [](double p){return std::log(p);}); } -void SISSOLogRegressor::set_a(std::vector<int>& inds, int start, int n_samp, double* a) +void SISSOLogRegressor::set_a(const std::vector<int>& inds, const int start, const int n_samp, double* a) { for(int ii = 0; ii < inds.size(); ++ii) { @@ -38,7 +38,7 @@ void SISSOLogRegressor::set_a(std::vector<int>& inds, int start, int n_samp, do } } -void SISSOLogRegressor::set_error(std::vector<int>& inds, double* coefs, int start, int n_samp, double* error) +void SISSOLogRegressor::set_error(const std::vector<int>& inds, const double* coefs, const int start, const int n_samp, double* error) const { std::fill_n(error + start, n_samp, -1 * (!_fix_intercept) * coefs[inds.size()]); for(int ii = 0; ii < inds.size(); ++ii) @@ -76,7 +76,7 @@ void SISSOLogRegressor::output_models(std::vector<double>& residual) } } -void SISSOLogRegressor::add_model(std::vector<int> indexes) +void SISSOLogRegressor::add_model(const std::vector<int> indexes) { if(_models.size() < indexes.size()) { diff --git a/src/descriptor_identifier/SISSO_DI/SISSOLogRegressor.hpp b/src/descriptor_identifier/SISSO_DI/SISSOLogRegressor.hpp index 6a06c9b0533250d0dd3928d785b5547e452094df..bd54aaddee7560ad6ce46d052765eac3f65e30d3 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSOLogRegressor.hpp +++ b/src/descriptor_identifier/SISSO_DI/SISSOLogRegressor.hpp @@ -45,18 +45,18 @@ public: * @param fix_intrecept If true fix intercept to 0 */ SISSOLogRegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - bool fix_intercept=false + const std::shared_ptr<FeatureSpace> feat_space, + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop, + const std::vector<double> prop_test, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const std::vector<int> leave_out_inds, + const int n_dim, + const int n_residual, + const int n_models_store, + const bool fix_intercept=false ); /** @@ -66,7 +66,7 @@ public: * @param start The index in the property and feature vectors start copying into _b and _a * @param n_samp number of samples to perform least squares optimization on */ - void set_a(std::vector<int>& inds, int start, int n_samp, double* a); + void set_a(const std::vector<int>& inds, const int start, const int n_samp, double* a); /** * @brief Set the residual for the next step @@ -77,14 +77,14 @@ public: * @param n_samp number of samples to perform least squares optimization on * @param error pointer to the error vector */ - virtual void set_error(std::vector<int>& inds, double* coefs, int start, int n_samp, double* error); + virtual void set_error(const std::vector<int>& inds, const double* coefs, const int start, const int n_samp, double* error) const; /** * @brief Adds a model to the model list * * @param indexes Vector storing all of the indexes of features in _phi_selected to use for the model */ - void add_model(std::vector<int> indexes); + void add_model(const std::vector<int> indexes); /** * @brief Output the models to files and copy the residuals @@ -104,7 +104,7 @@ public: /** * @brief Acessor function for models */ - inline std::vector<std::vector<ModelLogRegressor>> models(){return _models;} + inline std::vector<std::vector<ModelLogRegressor>> models() const {return _models;} // Python interface functions #ifdef PY_BINDINGS diff --git a/src/descriptor_identifier/SISSO_DI/SISSORegressor.cpp b/src/descriptor_identifier/SISSO_DI/SISSORegressor.cpp index 35ed8000699c4f2789a1e7d91f492b43c82e6645..56484b0864389f0c926769df7eb791bd64101f61 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSORegressor.cpp +++ b/src/descriptor_identifier/SISSO_DI/SISSORegressor.cpp @@ -1,18 +1,18 @@ #include <descriptor_identifier/SISSO_DI/SISSORegressor.hpp> SISSORegressor::SISSORegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - bool fix_intercept + const std::shared_ptr<FeatureSpace> feat_space, + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop, + const std::vector<double> prop_test, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const std::vector<int> leave_out_inds, + const int n_dim, + const int n_residual, + const int n_models_store, + const bool fix_intercept ): SISSO_DI( feat_space, @@ -30,7 +30,7 @@ SISSORegressor::SISSORegressor( ) {} -void SISSORegressor::set_a(std::vector<int>& inds, int start, int n_samp, double* a) +void SISSORegressor::set_a(const std::vector<int>& inds, int start, int n_samp, double* a) { for(int ii = 0; ii < inds.size(); ++ii) { @@ -43,7 +43,7 @@ void SISSORegressor::set_a(std::vector<int>& inds, int start, int n_samp, doubl } } -void SISSORegressor::least_squares(std::vector<int>& inds, double* coeffs, int start, int n_samp, double* a, double*b, double* work) +void SISSORegressor::least_squares(const std::vector<int>& inds, double* coeffs, const int start, const int n_samp, double* a, double*b, double* work) { int info; int n_dim = inds.size() + (1 - _fix_intercept); @@ -79,7 +79,7 @@ void SISSORegressor::least_squares(std::vector<int>& inds, double* coeffs, int s } } -int SISSORegressor::get_opt_lwork(int n_dim, double* a, double* b) +int SISSORegressor::get_opt_lwork(const int n_dim, double* a, double* b) { std::vector<double> work(1, 0.0); int info = 0, rank = 0; @@ -96,7 +96,7 @@ int SISSORegressor::get_opt_lwork(int n_dim, double* a, double* b) } } -void SISSORegressor::set_error(std::vector<int>& inds, double* coefs, int start, int n_samp, double* error) +void SISSORegressor::set_error(const std::vector<int>& inds, const double* coefs, const int start, const int n_samp, double* error) const { std::fill_n(error + start, n_samp, -1 * (!_fix_intercept) * coefs[inds.size()]); for(int ii = 0; ii < inds.size(); ++ii) @@ -107,7 +107,7 @@ void SISSORegressor::set_error(std::vector<int>& inds, double* coefs, int start, } -void SISSORegressor::add_model(std::vector<int> indexes) +void SISSORegressor::add_model(const std::vector<int> indexes) { if(_models.size() < indexes.size()) _models.push_back({}); @@ -142,7 +142,7 @@ void SISSORegressor::output_models(std::vector<double>& residual) } } -void SISSORegressor::l0_norm(std::vector<double>& prop, int n_dim) +void SISSORegressor::l0_norm(std::vector<double>& prop, const int n_dim) { int n_get_models = std::max(_n_residual, _n_models_store); std::vector<double> coefs(n_dim + 1, 0.0); diff --git a/src/descriptor_identifier/SISSO_DI/SISSORegressor.hpp b/src/descriptor_identifier/SISSO_DI/SISSORegressor.hpp index 32ff50e31a7848d92e4ef260f2c5470e28675ae0..105e72de25c3ca348ed7dc672d0c2aa7a6e5eb68 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSORegressor.hpp +++ b/src/descriptor_identifier/SISSO_DI/SISSORegressor.hpp @@ -49,18 +49,18 @@ public: * @param fix_intrecept If true fix intercept to 0 */ SISSORegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - bool fix_intercept=false + const std::shared_ptr<FeatureSpace> feat_space, + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop, + const std::vector<double> prop_test, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const std::vector<int> leave_out_inds, + const int n_dim, + const int n_residual, + const int n_models_store, + const bool fix_intercept=false ); /** @@ -72,7 +72,7 @@ public: * @param n_samp number of samples to perform least squares optimization on * @param error pointer to the error vector */ - virtual void set_error(std::vector<int>& inds, double* coefs, int start, int n_samp, double* error); + virtual void set_error(const std::vector<int>& inds, const double* coefs, const int start, const int n_samp, double* error) const; /** * @brief Get the optimal size of the working array for dgels (solve Ax = b) @@ -82,7 +82,7 @@ public: * @param b the pointer to the b vector * @return Optimal size of the working array */ - int get_opt_lwork(int n_dim, double* a, double* b); + int get_opt_lwork(const int n_dim, double* a, double* b); /** * @brief Preform Least squares optimization using dgels (solve Ax = b) @@ -95,7 +95,7 @@ public: * @param b the pointer to the b vector * @param work pointer to the working space for dgels */ - void least_squares(std::vector<int>& inds, double* coefs, int start, int n_samp, double* a, double*b, double* work); + void least_squares(const std::vector<int>& inds, double* coefs, const int start, const int n_samp, double* a, double*b, double* work); /** * @brief Set the A matrix for the least squares problem @@ -105,14 +105,14 @@ public: * @param n_samp number of samples to perform least squares optimization on * @param a the pointer to the A matrix */ - virtual void set_a(std::vector<int>& inds, int start, int n_samp, double* a); + virtual void set_a(const std::vector<int>& inds, int start, int n_samp, double* a); /** * @brief Adds a model to the model list * * @param indexes Vector storing all of the indexes of features in _phi_selected to use for the model */ - virtual void add_model(std::vector<int> indexes); + virtual void add_model(const std::vector<int> indexes); /** * @brief Output the models to files and copy the residuals @@ -127,7 +127,7 @@ public: * @param prop The property to fit * @param n_dim the dimensionality of the model */ - virtual void l0_norm(std::vector<double>& prop, int n_dim); + virtual void l0_norm(std::vector<double>& prop, const int n_dim); // DocString: sisso_reg_fit /** @@ -139,7 +139,7 @@ public: /** * @brief Acessor function for models */ - inline std::vector<std::vector<ModelRegressor>> models(){return _models;} + inline std::vector<std::vector<ModelRegressor>> models() const {return _models;} // Python interface functions #ifdef PY_BINDINGS diff --git a/src/descriptor_identifier/SISSO_DI/SISSO_DI.cpp b/src/descriptor_identifier/SISSO_DI/SISSO_DI.cpp index d8e2cf59bd381d3b32ed9bdaffbac40423b206f3..bbfff6f844527776e8e05406711dc8ec36493a84 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSO_DI.cpp +++ b/src/descriptor_identifier/SISSO_DI/SISSO_DI.cpp @@ -1,18 +1,18 @@ #include <descriptor_identifier/SISSO_DI/SISSO_DI.hpp> SISSO_DI::SISSO_DI( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - bool fix_intercept + const std::shared_ptr<FeatureSpace> feat_space, + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop, + const std::vector<double> prop_test, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const std::vector<int> leave_out_inds, + const int n_dim, + const int n_residual, + const int n_models_store, + const bool fix_intercept ): _prop(prop), _prop_test(prop_test), diff --git a/src/descriptor_identifier/SISSO_DI/SISSO_DI.hpp b/src/descriptor_identifier/SISSO_DI/SISSO_DI.hpp index 349d1a50e65181a0ebc25d7e46bdebeb14bc2ba2..7b123c5142b8903cf1087c1fa32a7c6318b4302a 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSO_DI.hpp +++ b/src/descriptor_identifier/SISSO_DI/SISSO_DI.hpp @@ -31,23 +31,23 @@ protected: std::vector<double> _prop_test; //!< Property array (testing data) std::vector<double> _error; //!< Array to calculate the residuals for the models (training data) - std::vector<int> _task_sizes_train; //!< Number of training samples per task - std::vector<int> _task_sizes_test; //!< Number of testing samples per task - std::vector<int> _leave_out_inds; //!< List of indexes from the initial data file in the test set + const std::vector<int> _task_sizes_train; //!< Number of training samples per task + const std::vector<int> _task_sizes_test; //!< Number of testing samples per task + const std::vector<int> _leave_out_inds; //!< List of indexes from the initial data file in the test set - Unit _prop_unit; //!< The Unit for the property - std::string _prop_label; //!< The label for the property + const Unit _prop_unit; //!< The Unit for the property + const std::string _prop_label; //!< The label for the property - std::shared_ptr<FeatureSpace> _feat_space; //!< Feature Space for the problem + const std::shared_ptr<FeatureSpace> _feat_space; //!< Feature Space for the problem std::shared_ptr<MPI_Interface> _mpi_comm; //!< MPI Communicator - int _n_task; //!< The number of tasks - int _n_samp; //!< the number of samples per feature - int _n_dim; //!< Number of dimensions to calculate - int _n_residual; //!< Number of residuals to pass to the next sis model - int _n_models_store; //!< The number of models to store in files + const int _n_task; //!< The number of tasks + const int _n_samp; //!< the number of samples per feature + const int _n_dim; //!< Number of dimensions to calculate + const int _n_residual; //!< Number of residuals to pass to the next sis model + const int _n_models_store; //!< The number of models to store in files - bool _fix_intercept; //!< If true fix intercept to 0 + const bool _fix_intercept; //!< If true fix intercept to 0 public: /** * @brief Constructor for the Regressor @@ -66,18 +66,18 @@ public: * @param fix_intrecept If true fix intercept to 0 */ SISSO_DI( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - std::vector<double> prop, - std::vector<double> prop_test, - std::vector<int> task_sizes_train, - std::vector<int> task_sizes_test, - std::vector<int> leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - bool fix_intercept=false + const std::shared_ptr<FeatureSpace> feat_space, + const std::string prop_label, + const Unit prop_unit, + const std::vector<double> prop, + const std::vector<double> prop_test, + const std::vector<int> task_sizes_train, + const std::vector<int> task_sizes_test, + const std::vector<int> leave_out_inds, + const int n_dim, + const int n_residual, + const int n_models_store, + const bool fix_intercept=false ); /** @@ -92,51 +92,51 @@ public: * @param prop The property to fit * @param n_dim the dimensionality of the model */ - virtual void l0_norm(std::vector<double>& prop, int n_dim) = 0; + virtual void l0_norm(std::vector<double>& prop, const int n_dim) = 0; /** * @brief Acessor function for feat_space */ - inline std::shared_ptr<FeatureSpace> feat_space(){return _feat_space;} + inline std::shared_ptr<FeatureSpace> feat_space() const {return _feat_space;} /** * @brief Acessor function for prop */ - inline std::vector<double> prop(){return _prop;} + inline std::vector<double> prop() const {return _prop;} /** * @brief Acessor function for prop */ - inline std::vector<double> prop_test(){return _prop_test;} + inline std::vector<double> prop_test() const {return _prop_test;} /** * @brief Acessor function for the error vector */ - inline std::vector<double> error(){return _error;} + inline std::vector<double> error() const {return _error;} // DocString: sisso_di_n_samp /** * @brief The number of samples per feature */ - inline int n_samp(){return _n_samp;} + inline int n_samp() const {return _n_samp;} // DocString: sisso_di_n_dim /** * @brief The maximum model dimensionality */ - inline int n_dim(){return _n_dim;} + inline int n_dim() const {return _n_dim;} // DocString: sisso_di_n_residual /** * @brief The number of residuals each iteration of SIS acts on */ - inline int n_residual(){return _n_residual;} + inline int n_residual() const {return _n_residual;} // DocString: sisso_di_n_modles_store /** * @brief The number of models to store in output files */ - inline int n_models_store(){return _n_models_store;} + inline int n_models_store() const {return _n_models_store;} // Python interface functions #ifdef PY_BINDINGS diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index 7cac0863a55b362c1b39d724d8120d1ab3a84e7a..938abef653bd56874b85b21f7326206d439cdd6a 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -38,7 +38,6 @@ BOOST_CLASS_EXPORT_GUID(AbsParamNode, "AbsParamNode") BOOST_CLASS_EXPORT_GUID(InvParamNode, "InvParamNode") BOOST_CLASS_EXPORT_GUID(SinParamNode, "SinParamNode") BOOST_CLASS_EXPORT_GUID(CosParamNode, "CosParamNode") -#endif FeatureSpace::FeatureSpace( std::shared_ptr<MPI_Interface> mpi_comm, @@ -82,6 +81,46 @@ FeatureSpace::FeatureSpace( { initialize_fs(); } +#else +FeatureSpace::FeatureSpace( + std::shared_ptr<MPI_Interface> mpi_comm, + std::vector<node_ptr> phi_0, + std::vector<std::string> allowed_ops, + std::vector<double> prop, + std::vector<int> task_sizes, + std::string project_type, + int max_phi, + int n_sis_select, + int max_store_rung, + int n_rung_generate, + double cross_corr_max, + double min_abs_feat_val, + double max_abs_feat_val +): + _phi(phi_0), + _phi_0(phi_0), + _allowed_ops(allowed_ops), + _prop(prop), + _scores(phi_0.size(), 0.0), + _task_sizes(task_sizes), + _start_gen(1, 0), + _feature_space_file("feature_space/selected_features.txt"), + _feature_space_summary_file("feature_space/SIS_summary.txt"), + _project_type(project_type), + _mpi_comm(mpi_comm), + _cross_cor_max(cross_corr_max), + _l_bound(min_abs_feat_val), + _u_bound(max_abs_feat_val), + _max_phi(max_phi), + _n_sis_select(n_sis_select), + _n_samp(phi_0[0]->n_samp()), + _n_feat(phi_0.size()), + _n_rung_store(max_store_rung), + _n_rung_generate(n_rung_generate) +{ + initialize_fs(); +} +#endif void FeatureSpace::initialize_fs() { @@ -179,7 +218,7 @@ void FeatureSpace::set_op_lists() #endif } -void FeatureSpace::initialize_fs_output_files() +void FeatureSpace::initialize_fs_output_files() const { if(_mpi_comm->rank() == 0) { @@ -200,8 +239,8 @@ void FeatureSpace::generate_new_feats( std::vector<node_ptr>& feat_set, unsigned long int& feat_ind, std::shared_ptr<NLOptimizer> optimizer, - double l_bound, - double u_bound + const double l_bound, + const double u_bound ) { @@ -255,8 +294,8 @@ void FeatureSpace::generate_new_feats( std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, unsigned long int& feat_ind, - double l_bound, - double u_bound + const double l_bound, + const double u_bound ) { unsigned long int phi_ind = feat - _phi.begin(); @@ -640,7 +679,7 @@ void FeatureSpace::generate_feature_space() _n_feat = _phi.size(); } -void FeatureSpace::project_generated(double* prop, int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel) +void FeatureSpace::project_generated(const double* prop, const int size, std::vector<node_ptr>& phi_sel, std::vector<double>& scores_sel) { std::vector<double> scores_sel_all(node_value_arrs::N_SELECTED); if(node_value_arrs::N_SELECTED > _n_sis_select) @@ -776,7 +815,7 @@ void FeatureSpace::project_generated(double* prop, int size, std::vector<node_pt } } -void FeatureSpace::sis(std::vector<double>& prop) +void FeatureSpace::sis(const std::vector<double>& prop) { // Create output directories if needed boost::filesystem::path p(_feature_space_file.c_str()); @@ -993,7 +1032,7 @@ void FeatureSpace::sis(std::vector<double>& prop) } } -void FeatureSpace::remove_feature(int ind) +void FeatureSpace::remove_feature(const int ind) { if(_phi[ind]->selected()) { diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index 557fb2cfbc8dd7d544a55475fbd10062a6ac12eb..e773d451ce47c4e469495d1d598f93ec77445ca5 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -48,9 +48,9 @@ class FeatureSpace std::vector<un_param_op_node_gen> _un_param_operators; //!< list of all parameterized unary operators with free parameters std::vector<bin_param_op_node_gen> _com_bin_param_operators; //!< list of all parameterized commutable binary operators with free parameters std::vector<bin_param_op_node_gen> _bin_param_operators; //!< list of all parameterized binary operators with free parameters + std::vector<std::string> _allowed_param_ops; //!< Map of parameterization operator set (set of operators and non-linear parameters used for a non-linear least squares fit to property) #endif - std::vector<std::string> _allowed_param_ops; //!< Map of parameterization operator set (set of operators and non-linear parameters used for a non-linear least squares fit to property) std::vector<std::string> _allowed_ops; //!< list of all allowed operators strings std::vector<un_op_node_gen> _un_operators; //!< list of all unary operators std::vector<bin_op_node_gen> _com_bin_operators; //!< list of all commutable binary operators @@ -88,6 +88,7 @@ class FeatureSpace public: + #ifdef PARAMETERIZE /** * @brief Constructor for the feature space * @details constructs the feature space from an initial set of features and a list of allowed operators @@ -125,7 +126,43 @@ public: double max_abs_feat_val=1e50, int max_param_depth = -1 ); - + #else + /** + * @brief Constructor for the feature space + * @details constructs the feature space from an initial set of features and a list of allowed operators + * + * @param mpi_comm MPI communicator for the calculations + * @param phi_0 The initial set of features to combine + * @param allowed_ops list of allowed operators + * @param allowed_param_ops dictionary of the parameterizable operators and their associated free parameters + * @param prop The property to be learned (training data) + * @param task_sizes The number of samples per task + * @param project_type The projection operator to use + * @param max_phi highest rung value for the calculation + * @param n_sis_select number of features to select during each SIS step + * @param max_store_rung number of rungs to calculate and store the value of the features for all samples + * @param n_rung_generate number of rungs to generate on the fly during SIS (this must be 1 or 0 right now, possible to be higher with recursive algorithm) + * @param cross_corr_max Maximum cross-correlation used for selecting features + * @param min_abs_feat_val minimum absolute feature value + * @param max_abs_feat_val maximum absolute feature value + * @param max_param_depth the maximum paremterization depths for features + */ + FeatureSpace( + std::shared_ptr<MPI_Interface> mpi_comm, + std::vector<node_ptr> phi_0, + std::vector<std::string> allowed_ops, + std::vector<double> prop, + std::vector<int> task_sizes, + std::string project_type="regression", + int max_phi=1, + int n_sis_select=1, + int max_store_rung=-1, + int n_rung_generate=0, + double cross_corr_max=1.0, + double min_abs_feat_val=1e-50, + double max_abs_feat_val=1e50 + ); + #endif /** * @brief Initialize the feature set given a property vector */ @@ -139,7 +176,7 @@ public: /** * @brief Initializes the output files for SIS */ - void initialize_fs_output_files(); + void initialize_fs_output_files() const; /** * @brief Generate the full feature set from the allowed operators and initial feature set * @details populates phi with all features from an initial set and the allowed operators @@ -149,86 +186,86 @@ public: /** * @brief The selected feature space */ - inline std::vector<node_ptr> phi_selected(){return _phi_selected;}; + inline std::vector<node_ptr> phi_selected() const {return _phi_selected;}; /** * @brief The full feature space */ - inline std::vector<node_ptr> phi(){return _phi;}; + inline std::vector<node_ptr> phi() const {return _phi;}; /** * @brief The initial feature space */ - inline std::vector<node_ptr> phi0(){return _phi_0;}; + inline std::vector<node_ptr> phi0() const {return _phi_0;}; /** * @brief The vector of projection scores for SIS */ - inline std::vector<double> scores(){return _scores;} + inline std::vector<double> scores() const {return _scores;} /** * @brief The MPI Communicator */ - inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;} + inline std::shared_ptr<MPI_Interface> mpi_comm() const {return _mpi_comm;} /** * @brief The vector storing the number of samples in each task */ - inline std::vector<int> task_sizes(){return _task_sizes;} + inline std::vector<int> task_sizes() const {return _task_sizes;} // DocString: feat_space_feature_space_file /** * @brief The feature space filename */ - inline std::string feature_space_file(){return _feature_space_file;} + inline std::string feature_space_file() const {return _feature_space_file;} // DocString: feat_space_l_bound /** * @brief The minimum absolute value of the feature */ - inline double l_bound(){return _l_bound;} + inline double l_bound() const {return _l_bound;} // DocString: feat_space_u_bound /** * @brief The maximum absolute value of the feature */ - inline double u_bound(){return _u_bound;} + inline double u_bound() const {return _u_bound;} // DocString: feat_space_max_phi /** * @brief The maximum rung of the feature space */ - inline int max_phi(){return _max_phi;} + inline int max_phi() const {return _max_phi;} // DocString: feat_space_n_sis_select /** * @brief The number of features selected in each SIS step */ - inline int n_sis_select(){return _n_sis_select;} + inline int n_sis_select() const {return _n_sis_select;} // DocString: feat_space_n_samp /** * @brief The number of samples per feature */ - inline int n_samp(){return _n_samp;} + inline int n_samp() const {return _n_samp;} // DocString: feat_space_n_feat /** * @brief The number of features in the feature space */ - inline int n_feat(){return _n_feat;} + inline int n_feat() const {return _n_feat;} // DocString: feat_space_n_rung_store /** * @brief The number of rungs whose feature training data is stored in memory */ - inline int n_rung_store(){return _n_rung_store;} + inline int n_rung_store() const {return _n_rung_store;} // DocString: feat_space_n_rung_generate /** * @brief The number of rungs to be generated on the fly during SIS */ - inline int n_rung_generate(){return _n_rung_generate;} + inline int n_rung_generate() const {return _n_rung_generate;} #ifdef PARAMETERIZE /** @@ -247,8 +284,8 @@ public: std::vector<node_ptr>& feat_set, unsigned long int& feat_ind, std::shared_ptr<NLOptimizer> optimizer, - double l_bound=1e-50, - double u_bound=1e50 + const double l_bound=1e-50, + const double u_bound=1e50 ); #else /** @@ -265,8 +302,8 @@ public: std::vector<node_ptr>::iterator& feat, std::vector<node_ptr>& feat_set, unsigned long int& feat_ind, - double l_bound=1e-50, - double u_bound=1e50 + const double l_bound=1e-50, + const double u_bound=1e50 ); #endif @@ -279,7 +316,7 @@ public: * @param phi_selected The features that would be selected from the previous rungs * @param scores_selected The projection scores of the features that would be selected from the previous rungs */ - void project_generated(double* prop, int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected); + void project_generated(const double* prop, const int size, std::vector<node_ptr>& phi_selected, std::vector<double>& scores_selected); /** * @brief Perform SIS on a feature set with a specified property @@ -287,7 +324,7 @@ public: * * @param prop The property to perform SIS over */ - void sis(std::vector<double>& prop); + void sis(const std::vector<double>& prop); // DocString: feat_space_feat_in_phi /** @@ -296,7 +333,7 @@ public: * @param ind The index of the feature * @return True if feature is in this rank's _phi */ - inline bool feat_in_phi(int ind){return (ind >= _phi[0]->feat_ind()) && (ind <= _phi.back()->feat_ind());} + inline bool feat_in_phi(int ind) const {return (ind >= _phi[0]->feat_ind()) && (ind <= _phi.back()->feat_ind());} // DocString: feat_space_remove_feature /** @@ -304,10 +341,11 @@ public: * * @param ind index of feature to remove */ - void remove_feature(int ind); + void remove_feature(const int ind); // Python Interface Functions #ifdef PY_BINDINGS + #ifdef PARAMETERIZE /** * @brief Constructor for the feature space that takes in python objects * @details constructs the feature space from an initial set of features and a list of allowed operators (cpp definition in <python/feature_creation/FeatureSpace.cpp>) @@ -379,7 +417,71 @@ public: double max_abs_feat_val=1e50, int max_param_depth = -1 ); + #else + /** + * @brief Constructor for the feature space that takes in python objects + * @details constructs the feature space from an initial set of features and a list of allowed operators (cpp definition in <python/feature_creation/FeatureSpace.cpp>) + * + * @param phi_0 The initial set of features to combine + * @param allowed_ops list of allowed operators + * @param prop The property to be learned (training data) + * @param task_sizes The number of samples per task + * @param project_type The projection operator to use + * @param max_phi highest rung value for the calculation + * @param n_sis_select number of features to select during each SIS step + * @param max_store_rung number of rungs to calculate and store the value of the features for all samples + * @param n_rung_generate number of rungs to generate on the fly during SIS (this must be 1 or 0 right now, possible to be higher with recursive algorithm) + * @param cross_corr_max Maximum cross-correlation used for selecting features + * @param min_abs_feat_val minimum absolute feature value + * @param max_abs_feat_val maximum absolute feature value + */ + FeatureSpace( + py::list phi_0, + py::list allowed_ops, + py::list prop, + py::list task_sizes, + std::string project_type="regression", + int max_phi=1, + int n_sis_select=1, + int max_store_rung=-1, + int n_rung_generate=0, + double cross_corr_max=1.0, + double min_abs_feat_val=1e-50, + double max_abs_feat_val=1e50 + ); + /** + * @brief Constructor for the feature space that takes in python and numpy objects + * @details constructs the feature space from an initial set of features and a list of allowed operators (cpp definition in <python/feature_creation/FeatureSpace.cpp>) + * + * @param phi_0 The initial set of features to combine + * @param allowed_ops list of allowed operators + * @param prop The property to be learned (training data) + * @param task_sizes The number of samples per task + * @param project_type The projection operator to use + * @param max_phi highest rung value for the calculation + * @param n_sis_select number of features to select during each SIS step + * @param max_store_rung number of rungs to calculate and store the value of the features for all samples + * @param n_rung_generate number of rungs to generate on the fly during SIS (this must be 1 or 0 right now, possible to be higher with recursive algorithm) + * @param cross_corr_max Maximum cross-correlation used for selecting features + * @param min_abs_feat_val minimum absolute feature value + * @param max_abs_feat_val maximum absolute feature value + */ + FeatureSpace( + py::list phi_0, + py::list allowed_ops, + np::ndarray prop, + py::list task_sizes, + std::string project_type="regression", + int max_phi=1, + int n_sis_select=1, + int max_store_rung=-1, + int n_rung_generate=0, + double cross_corr_max=1.0, + double min_abs_feat_val=1e-50, + double max_abs_feat_val=1e50 + ); + #endif /** * @brief Constructor for the feature space that takes in python and numpy objects * @details constructs the feature space from an initial set of features and a file containing postfix expressions for the features (cpp definition in <python/feature_creation/FeatureSpace.cpp>) @@ -504,7 +606,7 @@ public: * @param ind index of the feature to get * @return A ModelNode of the feature at index ind */ - inline ModelNode get_feature(int ind){return ModelNode(_phi[ind]);} + inline ModelNode get_feature(const int ind) const {return ModelNode(_phi[ind]);} #endif }; diff --git a/src/python/bindings_docstring_keyed.cpp b/src/python/bindings_docstring_keyed.cpp index e6b7c010bd70c324cc6eeb0eae82b3dfb00ecd76..91ddcc9c0b1a9d7651244e8812904200a622ff90 100644 --- a/src/python/bindings_docstring_keyed.cpp +++ b/src/python/bindings_docstring_keyed.cpp @@ -105,6 +105,7 @@ void sisso::feature_creation::registerFeatureSpace() void (FeatureSpace::*sis_list)(list) = &FeatureSpace::sis; void (FeatureSpace::*sis_ndarray)(np::ndarray) = &FeatureSpace::sis; + #ifdef PARAMETERIZE class_<FeatureSpace>("FeatureSpace", init<list, list, list, np::ndarray, list, optional<std::string, int, int, int, int, double, double, double, int>>()) .def(init<list, list, list, list, list, optional<std::string, int, int, int, int, double, double, double, int>>()) .def(init<std::string, list, list, list, optional<std::string, int, double>>()) @@ -131,6 +132,34 @@ void sisso::feature_creation::registerFeatureSpace() .add_property("n_rung_store", &FeatureSpace::n_rung_store, "@DocString_feat_space_n_rung_store@") .add_property("n_rung_generate", &FeatureSpace::n_rung_generate, "@DocString_feat_space_n_rung_generate@") ; + #else + class_<FeatureSpace>("FeatureSpace", init<list, list, np::ndarray, list, optional<std::string, int, int, int, int, double, double, double>>()) + .def(init<list, list, list, list, optional<std::string, int, int, int, int, double, double, double>>()) + .def(init<std::string, list, list, list, optional<std::string, int, double>>()) + .def(init<std::string, list, np::ndarray, list, optional<std::string, int, double>>()) + .def("sis", sis_list, "@DocString_feat_space_sis_list@") + .def("sis", sis_ndarray, "@DocString_feat_space_sis_arr@") + .def("feat_in_phi", &FeatureSpace::feat_in_phi, "@DocString_feat_space_feat_in_phi@") + .def("remove_feature", &FeatureSpace::remove_feature, "@DocString_feat_space_remove_feature@") + .def("get_feature", &FeatureSpace::get_feature, "@DocString_feat_space_get_feature@") + .add_property("phi_selected", &FeatureSpace::phi_selected_py, "@DocString_feat_space_phi_selected_py@") + .add_property("phi0", &FeatureSpace::phi0_py, "@DocString_feat_space_phi0_py@") + .add_property("phi", &FeatureSpace::phi_py, "@DocString_feat_space_phi_py@") + .add_property("scores", &FeatureSpace::scores_py, "@DocString_feat_space_scores_py@") + .add_property("task_sizes", &FeatureSpace::task_sizes_py, "@DocString_feat_space_task_sizes_py@") + .add_property("allowed_ops", &FeatureSpace::allowed_ops_py, "@DocString_feat_space_allowed_ops_py@") + .add_property("start_gen", &FeatureSpace::start_gen_py, "@DocString_feat_space_start_gen_py@") + .add_property("feature_space_file", &FeatureSpace::feature_space_file, "@DocString_feat_space_feature_space_file@") + .add_property("l_bound", &FeatureSpace::l_bound, "@DocString_feat_space_l_bound@") + .add_property("u_bound", &FeatureSpace::u_bound, "@DocString_feat_space_u_bound@") + .add_property("max_phi", &FeatureSpace::max_phi, "@DocString_feat_space_max_phi@") + .add_property("n_sis_select", &FeatureSpace::n_sis_select, "@DocString_feat_space_n_sis_select@") + .add_property("n_samp", &FeatureSpace::n_samp, "@DocString_feat_space_n_samp@") + .add_property("n_feat", &FeatureSpace::n_feat, "@DocString_feat_space_n_feat@") + .add_property("n_rung_store", &FeatureSpace::n_rung_store, "@DocString_feat_space_n_rung_store@") + .add_property("n_rung_generate", &FeatureSpace::n_rung_generate, "@DocString_feat_space_n_rung_generate@") + ; + #endif } void sisso::feature_creation::registerUnit() @@ -801,12 +830,12 @@ void sisso::feature_creation::node::registerSixPowNode() void sisso::descriptor_identifier::registerModel() { - np::ndarray (Model::*eval_many_dict)(py::dict) = &Model::eval_many_py; - np::ndarray (Model::*eval_many_ndarr)(np::ndarray) = &Model::eval_many_py; + np::ndarray (Model::*eval_many_dict)(py::dict) const = &Model::eval_many_py; + np::ndarray (Model::*eval_many_ndarr)(np::ndarray) const = &Model::eval_many_py; - double (Model::*eval_ndarr)(np::ndarray) = &Model::eval_py; - double (Model::*eval_list)(py::list) = &Model::eval_py; - double (Model::*eval_dict)(py::dict) = &Model::eval_py; + double (Model::*eval_ndarr)(np::ndarray) const = &Model::eval_py; + double (Model::*eval_list)(py::list) const = &Model::eval_py; + double (Model::*eval_dict)(py::dict) const = &Model::eval_py; class_<sisso::descriptor_identifier::Model_Wrap, boost::noncopyable>("Model", no_init) .def("eval_many", eval_many_dict) .def("eval_many", eval_many_ndarr) diff --git a/src/python/descriptor_identifier/Model.cpp b/src/python/descriptor_identifier/Model.cpp index e0a4231dde7cb7a2577ac6d56fa29021e42a1018..2af0586f21823c2a5f8c95fdc7418a3cc2658427 100644 --- a/src/python/descriptor_identifier/Model.cpp +++ b/src/python/descriptor_identifier/Model.cpp @@ -1,6 +1,6 @@ #include <descriptor_identifier/Model/Model.hpp> -np::ndarray Model::eval_many_py(np::ndarray x_in) +np::ndarray Model::eval_many_py(np::ndarray x_in) const { if(x_in.get_nd() != 2) { @@ -36,7 +36,7 @@ np::ndarray Model::eval_many_py(np::ndarray x_in) return python_conv_utils::to_ndarray<double>(eval(x)); } -np::ndarray Model::eval_many_py(py::dict x_in) +np::ndarray Model::eval_many_py(py::dict x_in) const { std::map<std::string, std::vector<double>> x; py::list keys = x_in.keys(); diff --git a/src/python/feature_creation/FeatureSpace.cpp b/src/python/feature_creation/FeatureSpace.cpp index 4fa6f9147351c23019e4623f989c26092c0cea97..1bf6218883278acfb7006e3d0c5e72e788a3cb2c 100644 --- a/src/python/feature_creation/FeatureSpace.cpp +++ b/src/python/feature_creation/FeatureSpace.cpp @@ -1,5 +1,6 @@ #include <feature_creation/feature_space/FeatureSpace.hpp> +#ifdef PARAMETERIZE FeatureSpace::FeatureSpace( py::list phi_0, py::list allowed_ops, @@ -83,6 +84,83 @@ FeatureSpace::FeatureSpace( { initialize_fs(); } +#else +FeatureSpace::FeatureSpace( + py::list phi_0, + py::list allowed_ops, + py::list prop, + py::list task_sizes, + std::string project_type, + int max_phi, + int n_sis_select, + int max_store_rung, + int n_rung_generate, + double cross_corr_max, + double min_abs_feat_val, + double max_abs_feat_val +): + _phi(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)), + _phi_0(_phi), + _allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)), + _prop(python_conv_utils::from_list<double>(prop)), + _scores(py::len(phi_0), 0.0), + _task_sizes(python_conv_utils::from_list<int>(task_sizes)), + _start_gen(1, 0), + _feature_space_file("feature_space/selected_features.txt"), + _feature_space_summary_file("feature_space/SIS_summary.txt"), + _project_type(project_type), + _mpi_comm(mpi_setup::comm), + _cross_cor_max(cross_corr_max), + _l_bound(min_abs_feat_val), + _u_bound(max_abs_feat_val), + _max_phi(max_phi), + _n_sis_select(n_sis_select), + _n_feat(py::len(phi_0)), + _n_rung_store(max_store_rung), + _n_rung_generate(n_rung_generate), + _n_samp(_phi[0]->n_samp()) +{ + initialize_fs(); +} + +FeatureSpace::FeatureSpace( + py::list phi_0, + py::list allowed_ops, + np::ndarray prop, + py::list task_sizes, + std::string project_type, + int max_phi, + int n_sis_select, + int max_store_rung, + int n_rung_generate, + double cross_corr_max, + double min_abs_feat_val, + double max_abs_feat_val +): + _phi(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)), + _phi_0(_phi), + _allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)), + _prop(python_conv_utils::from_ndarray<double>(prop)), + _scores(py::len(phi_0), 0.0), + _task_sizes(python_conv_utils::from_list<int>(task_sizes)), + _start_gen(1, 0), + _feature_space_file("feature_space/selected_features.txt"), + _feature_space_summary_file("feature_space/SIS_summary.txt"), + _project_type(project_type), + _mpi_comm(mpi_setup::comm), + _cross_cor_max(cross_corr_max), + _l_bound(min_abs_feat_val), + _u_bound(max_abs_feat_val), + _max_phi(max_phi), + _n_sis_select(n_sis_select), + _n_feat(py::len(phi_0)), + _n_rung_store(max_store_rung), + _n_rung_generate(n_rung_generate), + _n_samp(_phi[0]->n_samp()) +{ + initialize_fs(); +} +#endif FeatureSpace::FeatureSpace( std::string feature_file, py::list phi_0,