diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4408e53bbd86fce56149bf10ac57fa3ae1f5f229..4447c0dc984588c28131c0af8d7cfa095cb17a13 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -68,7 +68,8 @@ test-intel-googletest: - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH - export OMP_NUM_THREADS=2 - export OMP_PLACES=cores - - tests/googletest/sisso_test + - cd tests/ + - googletest/sisso_test test-intel-bin: stage: bin_test @@ -154,7 +155,8 @@ test-gnu-googletest: - source cpp_sisso_env/bin/activate - export OMP_NUM_THREADS=2 - export OMP_PLACES=cores - - tests/googletest/sisso_test + - cd tests/ + - googletest/sisso_test test-gnu-bin: stage: bin_test diff --git a/src/descriptor_identifier/solver/SISSOClassifier.cpp b/src/descriptor_identifier/solver/SISSOClassifier.cpp index 2872659935b56823a59cddafb4e595c9a6d38a7f..f5f355f0376fb3232e24abe12af8ab882c23e347 100644 --- a/src/descriptor_identifier/solver/SISSOClassifier.cpp +++ b/src/descriptor_identifier/solver/SISSOClassifier.cpp @@ -22,42 +22,17 @@ #include "descriptor_identifier/solver/SISSOClassifier.hpp" SISSOClassifier::SISSOClassifier( - const std::shared_ptr<FeatureSpace> feat_space, - const std::string prop_label, - const Unit prop_unit, - const std::vector<double> prop, - const std::vector<double> prop_test, - const std::vector<int> task_sizes_train, - const std::vector<int> task_sizes_test, - const std::vector<int> leave_out_inds, - const int n_dim, - const int n_residual, - const int n_models_store, - const std::vector<std::string> sample_ids_train, - const std::vector<std::string> sample_ids_test, - const std::vector<std::string> task_names + const InputParser inputs, const std::shared_ptr<FeatureSpace> feat_space ): - SISSOSolver( - "classification", - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - false - ), + SISSOSolver(inputs, feat_space), _c(1000.0), _width(1.0e-5) { + if(_fix_intercept) + { + std::cerr << "For classification the bias term can't be fixed at 0. Changing it now." << std::endl; + _fix_intercept = false; + } setup_d_mat_transfer(); } diff --git a/src/descriptor_identifier/solver/SISSOClassifier.hpp b/src/descriptor_identifier/solver/SISSOClassifier.hpp index 16ab12d0022799f759f197ddd5a77c9654f5cddc..4becb807f8ea69a6c5342f84ac27fd86fe4d1655 100644 --- a/src/descriptor_identifier/solver/SISSOClassifier.hpp +++ b/src/descriptor_identifier/solver/SISSOClassifier.hpp @@ -51,38 +51,17 @@ protected: int _n_class; //!< The number of classes in the calculation public: - /** - * @brief Create a SISSOClassifier object + + // DocString: sisso_class_init + /** + * @brief Constructor for the classifier * + * @param inputs The InputParser object for the calculation * @param feat_space Feature Space for the problem - * @param prop_label The label for the property - * @param prop_unit The unit of the property - * @param prop The value of the property to evaluate the loss function against for the training set - * @param prpo_test The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train Number of training samples per task - * @param task_sizes_test Number of testing samples per task - * @param leave_out_inds List of indexes from the initial data file in the test set - * @param n_dim The maximum number of features allowed in the linear model - * @param n_residual Number of residuals to pass to the next SIS operation - * @param n_models_store The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names */ SISSOClassifier( - const std::shared_ptr<FeatureSpace> feat_space, - const std::string prop_label, Unit prop_unit, - const std::vector<double> prop, - const std::vector<double> prop_test, - const std::vector<int> task_sizes_train, - const std::vector<int> task_sizes_test, - const std::vector<int> leave_out_inds, - const int n_dim, - const int n_residual, - const int n_models_store, - const std::vector<std::string> sample_ids_train, - const std::vector<std::string> sample_ids_test, - const std::vector<std::string> task_names + const InputParser inputs, + const std::shared_ptr<FeatureSpace> feat_space ); /** @@ -136,78 +115,6 @@ public: // Python interface functions #ifdef PY_BINDINGS - // DocString: sisso_class_init_arr - /** - * @brief Create a SISSOClassifier object - * - * @param feat_space (FeatureSpace) Feature Space for the problem - * @param prop_label (str) The label for the property - * @param prop_unit (Unit) The unit of the property - * @param prop (np.ndarray) The value of the property to evaluate the loss function against for the training set - * @param prpo_test (np.ndarray) The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train (list) Number of training samples per task - * @param task_sizes_test (list) Number of testing samples per task - * @param leave_out_inds (list) List of indexes from the initial data file in the test set - * @param n_dim (int) The maximum number of features allowed in the linear model - * @param n_residual (int) Number of residuals to pass to the next SIS operation - * @param n_models_store (int) The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - */ - SISSOClassifier( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names - ); - - // DocString: sisso_class_init_list - /** - * @brief Create a SISSOClassifier object - * - * @param feat_space (FeatureSpace) Feature Space for the problem - * @param prop_label (str) The label for the property - * @param prop_unit (Unit) The unit of the property - * @param prop (list) The value of the property to evaluate the loss function against for the training set - * @param prpo_test (list) The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train (list) Number of training samples per task - * @param task_sizes_test (list) Number of testing samples per task - * @param leave_out_inds (list) List of indexes from the initial data file in the test set - * @param n_dim (int) The maximum number of features allowed in the linear model - * @param n_residual (int) Number of residuals to pass to the next SIS operation - * @param n_models_store (int) The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - */ - SISSOClassifier( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names - ); - // DocString: sisso_class_models_py /** * @brief The selected models (n_dim, n_models_store) diff --git a/src/descriptor_identifier/solver/SISSOLogRegressor.cpp b/src/descriptor_identifier/solver/SISSOLogRegressor.cpp index 9d5029662ee0b1403c0258e9ae87e2d3c7bede5c..c2c634262348ec5fda749d342c1499940a11d1d7 100644 --- a/src/descriptor_identifier/solver/SISSOLogRegressor.cpp +++ b/src/descriptor_identifier/solver/SISSOLogRegressor.cpp @@ -22,44 +22,14 @@ #include "descriptor_identifier/solver/SISSOLogRegressor.hpp" SISSOLogRegressor::SISSOLogRegressor( - const std::shared_ptr<FeatureSpace> feat_space, - const std::string prop_label, - const Unit prop_unit, - const std::vector<double> prop, - const std::vector<double> prop_test, - const std::vector<int> task_sizes_train, - const std::vector<int> task_sizes_test, - const std::vector<int> leave_out_inds, - const int n_dim, - const int n_residual, - const int n_models_store, - const std::vector<std::string> sample_ids_train, - const std::vector<std::string> sample_ids_test, - const std::vector<std::string> task_names, - const bool fix_intercept + const InputParser inputs, const std::shared_ptr<FeatureSpace> feat_space ): - SISSORegressor( - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - fix_intercept - ) + SISSORegressor(inputs, feat_space) { - std::vector<double> log_prop(prop); - std::vector<double> log_prop_test(prop_test); - std::transform(prop.begin(), prop.end(), log_prop.begin(), [](double p){return std::log(p);}); - std::transform(prop_test.begin(), prop_test.end(), log_prop_test.begin(), [](double p){return std::log(p);}); + std::vector<double> log_prop(inputs.prop_train().size(), 0.0); + std::vector<double> log_prop_test(inputs.prop_test().size(), 0.0); + std::transform(inputs.prop_train().begin(), inputs.prop_train().end(), log_prop.begin(), [](double p){return std::log(p);}); + std::transform(inputs.prop_test().begin(), inputs.prop_test().end(), log_prop_test.begin(), [](double p){return std::log(p);}); _loss = loss_function_util::get_loss_function( "log_regression", diff --git a/src/descriptor_identifier/solver/SISSOLogRegressor.hpp b/src/descriptor_identifier/solver/SISSOLogRegressor.hpp index 34c63511f94cf4fab4e89f1aef933e567e356c26..21ab986d7676340910796e7306f34734156c1a6b 100644 --- a/src/descriptor_identifier/solver/SISSOLogRegressor.hpp +++ b/src/descriptor_identifier/solver/SISSOLogRegressor.hpp @@ -41,41 +41,17 @@ private: std::vector<std::vector<ModelLogRegressor>> _models; //!< List of models public: + + // DocString: sisso_log_reg_init /** - * @brief Create a SISSOLogRegressor object + * @brief Constructor for the log regressor * + * @param inputs The InputParser object for the calculation * @param feat_space Feature Space for the problem - * @param prop_label The label for the property - * @param prop_unit The unit of the property - * @param prop The value of the property to evaluate the loss function against for the training set - * @param prpo_test The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train Number of training samples per task - * @param task_sizes_test Number of testing samples per task - * @param leave_out_inds List of indexes from the initial data file in the test set - * @param n_dim The maximum number of features allowed in the linear model - * @param n_residual Number of residuals to pass to the next SIS operation - * @param n_models_store The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - * @param fix_intrecept If true the bias term is fixed at 0 */ SISSOLogRegressor( - const std::shared_ptr<FeatureSpace> feat_space, - const std::string prop_label, - const Unit prop_unit, - const std::vector<double> prop, - const std::vector<double> prop_test, - const std::vector<int> task_sizes_train, - const std::vector<int> task_sizes_test, - const std::vector<int> leave_out_inds, - const int n_dim, - const int n_residual, - const int n_models_store, - const std::vector<std::string> sample_ids_train, - const std::vector<std::string> sample_ids_test, - const std::vector<std::string> task_names, - const bool fix_intercept=false + const InputParser inputs, + const std::shared_ptr<FeatureSpace> feat_space ); /** @@ -104,81 +80,6 @@ public: // Python interface functions #ifdef PY_BINDINGS - // DocString: sisso_log_reg_init_arr - /** - * @brief Create a SISSOLogRegressor object - * - * @param feat_space (FeatureSpace) Feature Space for the problem - * @param prop_label (str) The label for the property - * @param prop_unit (Unit) The unit of the property - * @param prop (np.ndarray) The value of the property to evaluate the loss function against for the training set - * @param prpo_test (np.ndarray) The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train (list) Number of training samples per task - * @param task_sizes_test (list) Number of testing samples per task - * @param leave_out_inds (list) List of indexes from the initial data file in the test set - * @param n_dim (int) The maximum number of features allowed in the linear model - * @param n_residual (int) Number of residuals to pass to the next SIS operation - * @param n_models_store (int) The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - * @param fix_intrecept (bool) If true the bias term is fixed at 0 - */ - SISSOLogRegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept=false - ); - - // DocString: sisso_log_reg_init_list - /** - * @brief Create a SISSOLogRegressor object - * - * @param feat_space (FeatureSpace) Feature Space for the problem - * @param prop_label (str) The label for the property - * @param prop_unit (Unit) The unit of the property - * @param prop (list) The value of the property to evaluate the loss function against for the training set - * @param prpo_test (list) The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train (list) Number of training samples per task - * @param task_sizes_test (list) Number of testing samples per task - * @param leave_out_inds (list) List of indexes from the initial data file in the test set - * @param n_dim (int) The maximum number of features allowed in the linear model - * @param n_residual (int) Number of residuals to pass to the next SIS operation - * @param n_models_store (int) The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - * @param fix_intrecept (bool) If true the bias term is fixed at 0 - */ - SISSOLogRegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept=false - ); // DocString: sisso_log_reg_models_py /** diff --git a/src/descriptor_identifier/solver/SISSORegressor.cpp b/src/descriptor_identifier/solver/SISSORegressor.cpp index e6853bdc93cfc67a99e00894c57b7d70b86b5c12..b49be9d2feb3e10eba3e3bc944aadbc63bcd6a1b 100644 --- a/src/descriptor_identifier/solver/SISSORegressor.cpp +++ b/src/descriptor_identifier/solver/SISSORegressor.cpp @@ -22,40 +22,9 @@ #include "descriptor_identifier/solver/SISSORegressor.hpp" SISSORegressor::SISSORegressor( - const std::shared_ptr<FeatureSpace> feat_space, - const std::string prop_label, - const Unit prop_unit, - const std::vector<double> prop, - const std::vector<double> prop_test, - const std::vector<int> task_sizes_train, - const std::vector<int> task_sizes_test, - const std::vector<int> leave_out_inds, - const int n_dim, - const int n_residual, - const int n_models_store, - const std::vector<std::string> sample_ids_train, - const std::vector<std::string> sample_ids_test, - const std::vector<std::string> task_names, - const bool fix_intercept + const InputParser inputs, const std::shared_ptr<FeatureSpace> feat_space ): - SISSOSolver( - "regression", - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - fix_intercept - ) + SISSOSolver(inputs, feat_space) {} void SISSORegressor::add_models(const std::vector<std::vector<int>> indexes) diff --git a/src/descriptor_identifier/solver/SISSORegressor.hpp b/src/descriptor_identifier/solver/SISSORegressor.hpp index c7f8ff71040e82a153075963564be1e687e665d9..eb18bc837e5a7d1cf548b2f0f7f4842f2f9e79bc 100644 --- a/src/descriptor_identifier/solver/SISSORegressor.hpp +++ b/src/descriptor_identifier/solver/SISSORegressor.hpp @@ -41,41 +41,16 @@ private: std::vector<std::vector<ModelRegressor>> _models; //!< List of models public: + // DocString: sisso_reg_init /** - * @brief Create a SISSORegressor object + * @brief Constructor for the regressor * + * @param inputs The InputParser object for the calculation * @param feat_space Feature Space for the problem - * @param prop_label The label for the property - * @param prop_unit The unit of the property - * @param prop The value of the property to evaluate the loss function against for the training set - * @param prpo_test The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train Number of training samples per task - * @param task_sizes_test Number of testing samples per task - * @param leave_out_inds List of indexes from the initial data file in the test set - * @param n_dim The maximum number of features allowed in the linear model - * @param n_residual Number of residuals to pass to the next SIS operation - * @param n_models_store The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - * @param fix_intrecept If true the bias term is fixed at 0 */ SISSORegressor( - const std::shared_ptr<FeatureSpace> feat_space, - const std::string prop_label, - const Unit prop_unit, - const std::vector<double> prop, - const std::vector<double> prop_test, - const std::vector<int> task_sizes_train, - const std::vector<int> task_sizes_test, - const std::vector<int> leave_out_inds, - const int n_dim, - const int n_residual, - const int n_models_store, - const std::vector<std::string> sample_ids_train, - const std::vector<std::string> sample_ids_test, - const std::vector<std::string> task_names, - const bool fix_intercept=false + const InputParser inputs, + const std::shared_ptr<FeatureSpace> feat_space ); /** @@ -111,82 +86,6 @@ public: // Python interface functions #ifdef PY_BINDINGS - // DocString: sisso_reg_init_arr - /** - * @brief Create a SISSORegressor object - * - * @param feat_space (FeatureSpace) Feature Space for the problem - * @param prop_label (str) The label for the property - * @param prop_unit (Unit) The unit of the property - * @param prop (np.ndarray) The value of the property to evaluate the loss function against for the training set - * @param prpo_test (np.ndarray) The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train (list) Number of training samples per task - * @param task_sizes_test (list) Number of testing samples per task - * @param leave_out_inds (list) List of indexes from the initial data file in the test set - * @param n_dim (int) The maximum number of features allowed in the linear model - * @param n_residual (int) Number of residuals to pass to the next SIS operation - * @param n_models_store (int) The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - * @param fix_intrecept (bool) If true the bias term is fixed at 0 - */ - SISSORegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept=false - ); - - // DocString: sisso_reg_init_list - /** - * @brief Create a SISSORegressor object - * - * @param feat_space (FeatureSpace) Feature Space for the problem - * @param prop_label (str) The label for the property - * @param prop_unit (Unit) The unit of the property - * @param prop (list) The value of the property to evaluate the loss function against for the training set - * @param prpo_test (list) The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train (list) Number of training samples per task - * @param task_sizes_test (list) Number of testing samples per task - * @param leave_out_inds (list) List of indexes from the initial data file in the test set - * @param n_dim (int) The maximum number of features allowed in the linear model - * @param n_residual (int) Number of residuals to pass to the next SIS operation - * @param n_models_store (int) The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - * @param fix_intrecept (bool) If true the bias term is fixed at 0 - */ - SISSORegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept=false - ); - // DocString: sisso_reg_models_py /** * @brief The selected models (n_dim, n_models_store) diff --git a/src/descriptor_identifier/solver/SISSOSolver.cpp b/src/descriptor_identifier/solver/SISSOSolver.cpp index 33019c72e0fd554c0cf54d1864cb90b533778017..eeab2fc0fc789a8ffa4483e6020d71ba1dea4270 100644 --- a/src/descriptor_identifier/solver/SISSOSolver.cpp +++ b/src/descriptor_identifier/solver/SISSOSolver.cpp @@ -22,44 +22,29 @@ #include "descriptor_identifier/solver/SISSOSolver.hpp" SISSOSolver::SISSOSolver( - const std::string loss_type, - const std::shared_ptr<FeatureSpace> feat_space, - const std::string prop_label, - const Unit prop_unit, - const std::vector<double> prop, - const std::vector<double> prop_test, - const std::vector<int> task_sizes_train, - const std::vector<int> task_sizes_test, - const std::vector<int> leave_out_inds, - const int n_dim, - const int n_residual, - const int n_models_store, - const std::vector<std::string> sample_ids_train, - const std::vector<std::string> sample_ids_test, - const std::vector<std::string> task_names, - const bool fix_intercept + const InputParser inputs, const std::shared_ptr<FeatureSpace> feat_space ): - _sample_ids_train(sample_ids_train), - _sample_ids_test(sample_ids_test), - _task_names(task_names), - _task_sizes_train(task_sizes_train), - _task_sizes_test(task_sizes_test), - _leave_out_inds(leave_out_inds), - _prop_label(prop_label), - _prop_unit(prop_unit), + _sample_ids_train(inputs.sample_ids_train()), + _sample_ids_test(inputs.sample_ids_test()), + _task_names(inputs.task_names()), + _task_sizes_train(inputs.task_sizes_train()), + _task_sizes_test(inputs.task_sizes_test()), + _leave_out_inds(inputs.leave_out_inds()), + _prop_label(inputs.prop_label()), + _prop_unit(inputs.prop_unit()), _feat_space(feat_space), _mpi_comm(feat_space->mpi_comm()), - _n_task(task_sizes_train.size()), - _n_samp(prop.size()), - _n_dim(n_dim), - _n_residual(n_residual), - _n_models_store(n_models_store), - _fix_intercept(fix_intercept) + _n_task(inputs.task_sizes_train().size()), + _n_samp(inputs.prop_train().size()), + _n_dim(inputs.n_dim()), + _n_residual(inputs.n_residual()), + _n_models_store(inputs.n_models_store()), + _fix_intercept(inputs.fix_intercept()) { _loss = loss_function_util::get_loss_function( - loss_type, - prop, - prop_test, + inputs.calc_type(), + inputs.prop_train(), + inputs.prop_test(), _task_sizes_train, _task_sizes_test, _fix_intercept diff --git a/src/descriptor_identifier/solver/SISSOSolver.hpp b/src/descriptor_identifier/solver/SISSOSolver.hpp index 939b8285c53d5688eb05f3c0c02f0cfd8f26cc9d..44955e67619b708cf37a39f9de17812186aea850 100644 --- a/src/descriptor_identifier/solver/SISSOSolver.hpp +++ b/src/descriptor_identifier/solver/SISSOSolver.hpp @@ -59,44 +59,17 @@ protected: const int _n_residual; //!< Number of residuals to pass to the next sis model const int _n_models_store; //!< The number of models to output to files - const bool _fix_intercept; //!< If true the bias term is fixed at 0 + bool _fix_intercept; //!< If true the bias term is fixed at 0 public: /** * @brief Constructor for the Solver * + * @param inputs The InputParser object for the calculation * @param feat_space Feature Space for the problem - * @param prop_label The label for the property - * @param prop_unit The unit of the property - * @param prop The value of the property to evaluate the loss function against for the training set - * @param prpo_test The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train Number of training samples per task - * @param task_sizes_test Number of testing samples per task - * @param leave_out_inds List of indexes from the initial data file in the test set - * @param n_dim The maximum number of features allowed in the linear model - * @param n_residual Number of residuals to pass to the next SIS operation - * @param n_models_store The number of models to output to files - * @param sample_ids_train A vector storing all sample ids for the training samples - * @param sample_ids_test A vector storing all sample ids for the test samples - * @param task_names A vector storing the ID of the task names - * @param fix_intrecept If true the bias term is fixed at 0 */ SISSOSolver( - const std::string loss_type, - const std::shared_ptr<FeatureSpace> feat_space, - const std::string prop_label, - const Unit prop_unit, - const std::vector<double> prop, - const std::vector<double> prop_test, - const std::vector<int> task_sizes_train, - const std::vector<int> task_sizes_test, - const std::vector<int> leave_out_inds, - const int n_dim, - const int n_residual, - const int n_models_store, - const std::vector<std::string> sample_ids_train, - const std::vector<std::string> sample_ids_test, - const std::vector<std::string> task_names, - const bool fix_intercept=false + const InputParser inputs, + const std::shared_ptr<FeatureSpace> feat_space ); /** @@ -119,7 +92,7 @@ public: /** * @brief The property vector for all of the training samples */ - inline std::vector<double> prop() const {return _loss->prop_train();} + inline std::vector<double> prop_train() const {return _loss->prop_train();} /** * @brief The property vector for all of the test samples @@ -158,89 +131,11 @@ public: // Python interface functions #ifdef PY_BINDINGS - // DocString: sisso_di_init_arr - /** - * @brief Constructor for the solver that takes in python objects - * - * @param feat_space Feature Space for the problem - * @param prop_label The label for the property - * @param prop_unit The unit of the property - * @param prop The value of the property to evaluate the loss function against for the training set - * @param prpo_test The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train Number of training samples per task - * @param task_sizes_test Number of testing samples per task - * @param leave_out_inds List of indexes from the initial data file in the test set - * @param n_dim The maximum number of features allowed in the linear model - * @param n_residual Number of residuals to pass to the next SIS operation - * @param n_models_store The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - * @param fix_intrecept If true the bias term is fixed at 0 - */ - SISSOSolver( - std::string loss_type, - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept=false - ); - - // DocString: sisso_di_init_list - /** - * @brief Constructor for the solver that takes in python objects - * - * @param feat_space Feature Space for the problem - * @param prop_label The label for the property - * @param prop_unit The unit of the property - * @param prop The value of the property to evaluate the loss function against for the training set - * @param prpo_test The value of the property to evaluate the loss function against for the test set - * @param task_sizes_train Number of training samples per task - * @param task_sizes_test Number of testing samples per task - * @param leave_out_inds List of indexes from the initial data file in the test set - * @param n_dim The maximum number of features allowed in the linear model - * @param n_residual Number of residuals to pass to the next SIS operation - * @param n_models_store The number of models to output to files - * @param sample_ids_train A list storing all sample ids for the training samples - * @param sample_ids_test A list storing all sample ids for the test samples - * @param task_names A list storing the ID of the task names - * @param fix_intrecept If true the bias term is fixed at 0 - */ - SISSOSolver( - std::string loss_type, - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept=false - ); - - // DocString: sisso_di_prop_py + // DocString: sisso_di_prop_train_py /** * @brief The property vector for all of the training samples as a numpy array */ - inline np::ndarray prop_py(){return python_conv_utils::to_ndarray<double>(_loss->prop_train());} + inline np::ndarray prop_train_py(){return python_conv_utils::to_ndarray<double>(_loss->prop_train());} // DocString: sisso_di_prop_test_py /** diff --git a/src/feature_creation/feature_space/FeatureSpace.cpp b/src/feature_creation/feature_space/FeatureSpace.cpp index 72eb25f83e2fc5d2472f01c5bfc49584f661ab62..471e639ec572d914343ac8b74ee8f7f4e41c9e24 100644 --- a/src/feature_creation/feature_space/FeatureSpace.cpp +++ b/src/feature_creation/feature_space/FeatureSpace.cpp @@ -58,97 +58,34 @@ BOOST_CLASS_EXPORT_GUID(AbsParamNode, "AbsParamNode") BOOST_CLASS_EXPORT_GUID(InvParamNode, "InvParamNode") BOOST_CLASS_EXPORT_GUID(SinParamNode, "SinParamNode") BOOST_CLASS_EXPORT_GUID(CosParamNode, "CosParamNode") +#endif -FeatureSpace::FeatureSpace( - std::shared_ptr<MPI_Interface> mpi_comm, - std::vector<node_ptr> phi_0, - std::vector<std::string> allowed_ops, - std::vector<std::string> allowed_param_ops, - std::vector<double> prop, - std::vector<int> task_sizes, - std::string project_type, - int max_rung, - int n_sis_select, - int n_rung_store, - int n_rung_generate, - double cross_corr_max, - double min_abs_feat_val, - double max_abs_feat_val, - int max_param_depth, - bool reparam_residual -): - _phi(phi_0), - _phi_0(phi_0), - _end_no_params(1, phi_0.size()), +FeatureSpace::FeatureSpace(InputParser inputs): + _phi_0(inputs.phi_0_ptrs()), + _phi(inputs.phi_0_ptrs()), + _end_no_params(1, inputs.phi_0().size()), _start_rung_reparam(1, 0), - _allowed_param_ops(allowed_param_ops), - _allowed_ops(allowed_ops), - _prop(prop), - _scores(phi_0.size(), 0.0), - _task_sizes(task_sizes), + _allowed_param_ops(inputs.allowed_param_ops_copy()), + _allowed_ops(inputs.allowed_ops_copy()), + _prop_train(inputs.prop_train_copy()), + _scores( inputs.phi_0().size()), + _task_sizes_train(inputs.task_sizes_train_copy()), _start_rung(1, 0), + _project_type(inputs.calc_type()), _feature_space_file("feature_space/selected_features.txt"), _feature_space_summary_file("feature_space/SIS_summary.txt"), - _project_type(project_type), - _mpi_comm(mpi_comm), - _cross_cor_max(cross_corr_max), - _l_bound(min_abs_feat_val), - _u_bound(max_abs_feat_val), - _max_rung(max_rung), - _n_sis_select(n_sis_select), - _n_samp(phi_0[0]->n_samp()), - _n_feat(phi_0.size()), - _n_rung_store(n_rung_store), - _n_rung_generate(n_rung_generate), - _max_param_depth(max_param_depth), - _reparam_residual(reparam_residual) -{ - initialize_fs(); -} -#else -FeatureSpace::FeatureSpace( - std::shared_ptr<MPI_Interface> mpi_comm, - std::vector<node_ptr> phi_0, - std::vector<std::string> allowed_ops, - std::vector<double> prop, - std::vector<int> task_sizes, - std::string project_type, - int max_rung, - int n_sis_select, - int n_rung_store, - int n_rung_generate, - double cross_corr_max, - double min_abs_feat_val, - double max_abs_feat_val -): - _phi(phi_0), - _phi_0(phi_0), - _allowed_ops(allowed_ops), - _prop(prop), - _scores(phi_0.size(), 0.0), - _task_sizes(task_sizes), - _start_rung(1, 0), - _feature_space_file("feature_space/selected_features.txt"), - _feature_space_summary_file("feature_space/SIS_summary.txt"), - _project_type(project_type), - _mpi_comm(mpi_comm), - _cross_cor_max(cross_corr_max), - _l_bound(min_abs_feat_val), - _u_bound(max_abs_feat_val), - _max_rung(max_rung), - _n_sis_select(n_sis_select), - _n_samp(phi_0[0]->n_samp()), - _n_feat(phi_0.size()), - _n_rung_store(n_rung_store), - _n_rung_generate(n_rung_generate), - _max_param_depth(0), - _reparam_residual(false) -{ - initialize_fs(); -} -#endif - -void FeatureSpace::initialize_fs() + _mpi_comm(inputs.mpi_comm()), + _cross_cor_max(inputs.cross_cor_max()), + _l_bound(inputs.l_bound()), + _u_bound(inputs.u_bound()), + _n_rung_store(inputs.n_rung_store()), + _n_feat(inputs.phi_0().size()), + _max_rung(inputs.max_rung()), + _n_sis_select(inputs.n_sis_select()), + _n_samp_train(inputs.n_samp_train()), + _n_rung_generate(inputs.n_rung_generate()), + _max_param_depth(inputs.max_param_depth()), + _reparam_residual(inputs.reparam_residual()) { #ifdef PARAMETERIZE if(_max_param_depth == -1) @@ -167,7 +104,7 @@ void FeatureSpace::initialize_fs() if(_phi[ff]->is_const()) { std::cerr << "WARNING: Primary Feature " << _phi[ff]->expr() << " is constant. Removing it from the feature space" << std::endl; - std::fill_n(_phi[ff]->value_ptr(), _n_samp, -1.0); + std::fill_n(_phi[ff]->value_ptr(), _n_samp_train, -1.0); _phi.erase(_phi.begin() + ff); } else @@ -196,7 +133,7 @@ void FeatureSpace::initialize_fs() throw std::logic_error("Requesting to store more rungs than what can be pre-generated."); } - node_value_arrs::set_task_sz_train(_task_sizes); + node_value_arrs::set_task_sz_train(_task_sizes_train); int n_max_ops = 0; for(int rr = 0; rr < _max_rung - _n_rung_store; ++rr) { @@ -204,7 +141,7 @@ void FeatureSpace::initialize_fs() } initialize_fs_output_files(); - comp_feats::set_is_valid_fxn(_project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list); + comp_feats::set_is_valid_fxn(_project_type, _cross_cor_max, _n_samp_train, _is_valid, _is_valid_feat_list); set_op_lists(); double start = omp_get_wtime(); @@ -261,6 +198,7 @@ void FeatureSpace::set_op_lists() void FeatureSpace::initialize_fs_output_files() const { + node_value_arrs::initialize_d_matrix_arr(); if(_mpi_comm->rank() == 0) { std::ofstream out_file_stream = std::ofstream(); @@ -385,8 +323,8 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) #pragma omp parallel firstprivate(feat_ind, l_bound, u_bound) { std::vector<node_ptr> next_phi_private; - std::shared_ptr<NLOptimizer> optimizer_param = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_rung, _max_param_depth); - std::shared_ptr<NLOptimizer> optimizer_reparam = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop, _max_rung, _max_param_depth); + std::shared_ptr<NLOptimizer> optimizer_param = nlopt_wrapper::get_optimizer(_project_type, _task_sizes_train, _prop_train, _max_rung, _max_param_depth); + std::shared_ptr<NLOptimizer> optimizer_reparam = nlopt_wrapper::get_optimizer(_project_type, _task_sizes_train, prop, _max_rung, _max_param_depth); #ifdef OMP45 #pragma omp for schedule(monotonic: dynamic) @@ -473,7 +411,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) // Remove identical features std::vector<double> scores(_phi_reparam.size()); _mpi_comm->barrier(); - project_funcs::project_r(_prop.data(), scores.data(), _phi_reparam, _task_sizes, 1); + project_funcs::project_r(_prop_train.data(), scores.data(), _phi_reparam, _task_sizes_train, 1); scores.erase(scores.begin(), scores.begin() + _start_rung_reparam.back()); inds = util_funcs::argsort<double>(scores); @@ -496,7 +434,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) util_funcs::r( _phi_reparam[_start_rung_reparam.back() + inds[sc]]->value_ptr(), _phi_reparam[_start_rung_reparam.back() + inds[sc]]->value_ptr(), - _n_samp + _n_samp_train ) ); for(int sc2 = sc + 1; sc2 < scores.size(); ++sc2) @@ -506,7 +444,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) util_funcs::r( _phi_reparam[_start_rung_reparam.back() + inds[sc]]->value_ptr(), _phi_reparam[_start_rung_reparam.back() + inds[sc2]]->value_ptr(0, true), - _n_samp + _n_samp_train ) ) ); @@ -523,7 +461,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) util_funcs::r( _phi_reparam[_start_rung_reparam.back() + inds[sc]]->value_ptr(), _phi_reparam[_start_rung_reparam.back() + inds[sc]]->value_ptr(), - _n_samp + _n_samp_train ) ); double comp = std::abs( @@ -531,7 +469,7 @@ void FeatureSpace::generate_reparam_feature_set(const std::vector<double>& prop) util_funcs::r( _phi_reparam[_start_rung_reparam.back() + inds[sc]]->value_ptr(), _phi_reparam[_start_rung_reparam.back() + inds[sc + 1]]->value_ptr(0, true), - _n_samp + _n_samp_train ) ) ); @@ -619,7 +557,7 @@ void FeatureSpace::generate_feature_space() #pragma omp parallel firstprivate(feat_ind, l_bound, u_bound) { std::vector<node_ptr> next_phi_private; - std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_rung, _max_param_depth); + std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes_train, _prop_train, _max_rung, _max_param_depth); #ifdef OMP45 #pragma omp for schedule(monotonic: dynamic) @@ -714,7 +652,7 @@ void FeatureSpace::generate_feature_space() // Remove identical features _scores.resize(_phi.size()); _mpi_comm->barrier(); - project_funcs::project_r(_prop.data(), _scores.data(), _phi, _task_sizes, 1); + project_funcs::project_r(_prop_train.data(), _scores.data(), _phi, _task_sizes_train, 1); _scores.erase(_scores.begin(), _scores.begin() + _start_rung[_start_rung.size() - 1]); inds = util_funcs::argsort<double>(_scores); @@ -737,7 +675,7 @@ void FeatureSpace::generate_feature_space() util_funcs::r( _phi[_start_rung.back() + inds[sc]]->value_ptr(), _phi[_start_rung.back() + inds[sc]]->value_ptr(), - _n_samp + _n_samp_train ) ); for(int sc2 = sc + 1; sc2 < _scores.size(); ++sc2) @@ -747,7 +685,7 @@ void FeatureSpace::generate_feature_space() util_funcs::r( _phi[_start_rung.back() + inds[sc]]->value_ptr(), _phi[_start_rung.back() + inds[sc2]]->value_ptr(0, true), - _n_samp + _n_samp_train ) ) ); @@ -764,7 +702,7 @@ void FeatureSpace::generate_feature_space() util_funcs::r( _phi[_start_rung.back() + inds[sc]]->value_ptr(), _phi[_start_rung.back() + inds[sc]]->value_ptr(), - _n_samp + _n_samp_train ) ); double comp = std::abs( @@ -772,7 +710,7 @@ void FeatureSpace::generate_feature_space() util_funcs::r( _phi[_start_rung.back() + inds[sc]]->value_ptr(), _phi[_start_rung.back() + inds[sc + 1]]->value_ptr(0, true), - _n_samp + _n_samp_train ) ) ); @@ -1012,12 +950,12 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std: int index_base = _phi.size() + _n_sis_select * (omp_get_thread_num() + _mpi_comm->size()); #ifdef PARAMETERIZE - std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, _prop, _max_rung, _max_param_depth); + std::shared_ptr<NLOptimizer> optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes_train, _prop_train, _max_rung, _max_param_depth); std::shared_ptr<NLOptimizer> reparam_optimizer; if(_reparam_residual) { std::vector<double> prop_vec(loss_copy->prop_project()); - reparam_optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes, prop_vec, _max_rung, _max_param_depth); + reparam_optimizer = nlopt_wrapper::get_optimizer(_project_type, _task_sizes_train, prop_vec, _max_rung, _max_param_depth); } else { @@ -1073,7 +1011,7 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std: double cur_score = scores[inds[ii]]; bool valid_feat = _is_valid( generated_phi[inds[ii]]->value_ptr(0), - _n_samp, + _n_samp_train, _cross_cor_max, scores_sel_all, cur_score, @@ -1082,7 +1020,7 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std: ); valid_feat = valid_feat && _is_valid_feat_list( generated_phi[inds[ii]]->value_ptr(0), - _n_samp, + _n_samp_train, _cross_cor_max, phi_sel_private, scores_sel_private, @@ -1119,7 +1057,7 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std: { if( ((phi_sel.size() < _n_sis_select) || (scores_sel_private[sc] < scores_sel[worst_score_ind])) && - _is_valid_feat_list(phi_sel_private[sc]->value_ptr(), _n_samp, _cross_cor_max, phi_sel, scores_sel, scores_sel_private[sc]) + _is_valid_feat_list(phi_sel_private[sc]->value_ptr(), _n_samp_train, _cross_cor_max, phi_sel, scores_sel, scores_sel_private[sc]) ) { if(phi_sel.size() == _n_sis_select) @@ -1154,8 +1092,8 @@ void FeatureSpace::sis(const std::vector<double>& prop) _project_type, prop, {}, - _task_sizes, - std::vector<int>(_task_sizes.size(), 0), + _task_sizes_train, + std::vector<int>(_task_sizes_train.size(), 0), false ) ); @@ -1223,14 +1161,13 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss) if(node_value_arrs::N_SELECTED > _n_sis_select) { project_funcs::project_loss(loss, _phi_selected, scores_sel_all.data()); - // _project(prop.data(), scores_sel_all.data(), _phi_selected, _task_sizes, prop.size() / _n_samp); } // Get the best features currently generated on this rank start_time = omp_get_wtime(); while((cur_feat_local != _n_sis_select) && (ii < _scores.size())) { - if(_is_valid(_phi[inds[ii]]->value_ptr(), _n_samp, _cross_cor_max, scores_sel_all, _scores[inds[ii]], cur_feat + cur_feat_local, 0)) + if(_is_valid(_phi[inds[ii]]->value_ptr(), _n_samp_train, _cross_cor_max, scores_sel_all, _scores[inds[ii]], cur_feat + cur_feat_local, 0)) { scores_sel[cur_feat_local] = _scores[inds[ii]]; scores_sel_all[cur_feat + cur_feat_local] = _scores[inds[ii]]; diff --git a/src/feature_creation/feature_space/FeatureSpace.hpp b/src/feature_creation/feature_space/FeatureSpace.hpp index 43d8bd9c6138098a4e318a82f8495bf30e745582..8d3eabd449072218fbdad2767239e90e358b6118 100644 --- a/src/feature_creation/feature_space/FeatureSpace.hpp +++ b/src/feature_creation/feature_space/FeatureSpace.hpp @@ -27,6 +27,7 @@ #include <utility> #include "feature_creation/node/utils.hpp" +#include "inputs/InputParser.hpp" #include "mpi_interface/MPI_Interface.hpp" #include "mpi_interface/MPI_Ops.hpp" @@ -48,7 +49,7 @@ class FeatureSpace { std::vector<node_ptr> _phi_selected; //!< A vector containing all of the selected features std::vector<node_ptr> _phi; //!< A vector containing all features generated (Not including those created on the Fly during SIS) - const std::vector<node_ptr> _phi_0; //!< A vector containing all of the Primary features + std::vector<node_ptr> _phi_0; //!< A vector containing all of the Primary features #ifdef PARAMETERIZE std::vector<node_ptr> _phi_reparam; //!< A vector containing the features created when reparameterizating using the residuals @@ -66,10 +67,10 @@ class FeatureSpace std::vector<bin_op_node_gen> _com_bin_operators; //!< Vector containing all commutable binary operators std::vector<bin_op_node_gen> _bin_operators; //!< Vector containing all binary operators - std::vector<double> _prop; //!< The value of the property vector for each training sample + std::vector<double> _prop_train; //!< The value of the property vector for each training sample std::vector<double> _scores; //!< The projection scores for each feature - const std::vector<int> _task_sizes; //!< Number of training samples per task + const std::vector<int> _task_sizes_train; //!< Number of training samples per task std::vector<int> _start_rung; //!< Vector containing the indexes where each rung starts in _phi const std::string _project_type; //!< The type of LossFunction to use when projecting the features onto a property const std::string _feature_space_file; //!< File to output the computer readable representation of the selected features to @@ -89,7 +90,7 @@ class FeatureSpace int _max_rung; //!< Maximum rung for the feature creation const int _n_sis_select; //!< Number of features to select during each SIS iteration - const int _n_samp; //!< Number of samples in the training set + const int _n_samp_train; //!< Number of samples in the training set const int _n_rung_generate; //!< Either 0 or 1, and is the number of rungs to generate on the fly during SIS int _max_param_depth; //!< The maximum depth in the binary expression tree to set non-linear optimization @@ -97,83 +98,13 @@ class FeatureSpace public: - #ifdef PARAMETERIZE + // DocString: feat_space_init /** - * @brief FeatureSpace constructor given a set of primary features and operators + * @brief Construct a FeatureSpace using an InputParser object * - * @param mpi_comm MPI communicator for the calculations - * @param phi_0 The set of primary features - * @param allowed_ops The list of allowed operators - * @param allowed_param_ops The list of allowed operators to be used with non-linear optimization - * @param prop List containing the property vector (training data only) - * @param task_sizes The number of samples per task - * @param project_type The type of loss function/projection operator to use - * @param max_rung The maximum rung of the feature (Height of the binary expression tree -1) - * @param n_sis_select The number of features to select during each SIS step - * @param n_rung_store The number of rungs whose feature's data is always stored in memory - * @param n_rung_generate Either 0 or 1, and is the number of rungs to generate on the fly during SIS - * @param cross_corr_max The maximum allowed cross-correlation value between selected features - * @param min_abs_feat_val The minimum allowed absolute feature value for a feature - * @param max_abs_feat_val The maximum allowed absolute feature value for a feature - * @param max_param_depth The maximum depth in the binary expression tree to set non-linear optimization - * @param reparam_residual If True then reparameterize features using the residuals of each model - */ - FeatureSpace( - std::shared_ptr<MPI_Interface> mpi_comm, - std::vector<node_ptr> phi_0, - std::vector<std::string> allowed_ops, - std::vector<std::string> allowed_param_ops, - std::vector<double> prop, - std::vector<int> task_sizes, - std::string project_type="regression", - int max_rung=1, - int n_sis_select=1, - int n_rung_store=-1, - int n_rung_generate=0, - double cross_corr_max=1.0, - double min_abs_feat_val=1e-50, - double max_abs_feat_val=1e50, - int max_param_depth=-1, - bool reparam_residual=false - ); - #else - /** - * @brief FeatureSpace constructor given a set of primary features and operators - * - * @param mpi_comm MPI communicator for the calculations - * @param phi_0 The set of primary features - * @param allowed_ops The list of allowed operators - * @param prop List containing the property vector (training data only) - * @param task_sizes The number of samples per task - * @param project_type The type of loss function/projection operator to use - * @param max_rung The maximum rung of the feature (Height of the binary expression tree -1) - * @param n_sis_select The number of features to select during each SIS step - * @param n_rung_store The number of rungs whose feature's data is always stored in memory - * @param n_rung_generate Either 0 or 1, and is the number of rungs to generate on the fly during SIS - * @param cross_corr_max The maximum allowed cross-correlation value between selected features - * @param min_abs_feat_val The minimum allowed absolute feature value for a feature - * @param max_abs_feat_val The maximum allowed absolute feature value for a feature - */ - FeatureSpace( - std::shared_ptr<MPI_Interface> mpi_comm, - std::vector<node_ptr> phi_0, - std::vector<std::string> allowed_ops, - std::vector<double> prop, - std::vector<int> task_sizes, - std::string project_type="regression", - int max_rung=1, - int n_sis_select=1, - int n_rung_store=-1, - int n_rung_generate=0, - double cross_corr_max=1.0, - double min_abs_feat_val=1e-50, - double max_abs_feat_val=1e50 - ); - #endif - /** - * @brief Initialize members of the FeatureSpace using _prop + * @param inputs InputParser object used to build the FeatureSpace */ - void initialize_fs(); + FeatureSpace(InputParser inputs); /** * @brief Populate the operator lists using _allowed_ops and _allowed_param_ops @@ -218,7 +149,7 @@ public: /** * @brief Number of training samples per task */ - inline std::vector<int> task_sizes() const {return _task_sizes;} + inline std::vector<int> task_sizes_train() const {return _task_sizes_train;} // DocString: feat_space_feature_space_file /** @@ -256,11 +187,11 @@ public: */ inline int n_sis_select() const {return _n_sis_select;} - // DocString: feat_space_n_samp + // DocString: feat_space_n_samp_train /** * @brief The nuumber of samples in the training set */ - inline int n_samp() const {return _n_samp;} + inline int n_samp_train() const {return _n_samp_train;} // DocString: feat_space_n_feat /** @@ -537,7 +468,7 @@ public: * @param feature_file (str) The file containing the postfix expressions of all features in the FeatureSpace * @param phi_0 (list) The set of primary features * @param prop (np.ndarray) List containing the property vector (training data only) - * @param task_sizes (list) The number of samples in the training data per task + * @param task_sizes_train (list) The number of samples in the training data per task * @param project_type (str) The type of loss function/projection operator to use * @param n_sis_select (int) The number of features to select during each SIS step * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features @@ -546,7 +477,7 @@ public: std::string feature_file, py::list phi_0, np::ndarray prop, - py::list task_sizes, + py::list task_sizes_train, std::string project_type="regression", int n_sis_select=1, double cross_corr_max=1.0 @@ -559,7 +490,7 @@ public: * @param feature_file (str) The file containing the postfix expressions of all features in the FeatureSpace * @param phi_0 (list) The set of primary features * @param prop (list) List containing the property vector (training data only) - * @param task_sizes (list) The number of samples in the training data per task + * @param task_sizes_train (list) The number of samples in the training data per task * @param project_type (str) The type of loss function/projection operator to use * @param n_sis_select (int) The number of features to select during each SIS step * @param cross_corr_max (double) The maximum allowed cross-correlation value between selected features @@ -568,7 +499,7 @@ public: std::string feature_file, py::list phi_0, py::list prop, - py::list task_sizes, + py::list task_sizes_train, std::string project_type="regression", int n_sis_select=1, double cross_corr_max=1.0 @@ -622,11 +553,11 @@ public: */ inline np::ndarray scores_py(){return python_conv_utils::to_ndarray<double>(_scores);}; - // DocString: feat_space_task_sizes_py + // DocString: feat_space_task_sizes_train_py /** * @brief A list of the number of samples in each task for the training data */ - inline py::list task_sizes_py(){return python_conv_utils::to_list<int>(_task_sizes);}; + inline py::list task_sizes_train_py(){return python_conv_utils::to_list<int>(_task_sizes_train);}; // DocString: feat_space_allowed_ops_py /** diff --git a/src/feature_creation/node/FeatureNode.cpp b/src/feature_creation/node/FeatureNode.cpp index 4422d0f02a8ab9f797a173aab3010a3c50a86054..72b62298d60d2aaf1c8e5f1f71f9cbef5d7378b4 100644 --- a/src/feature_creation/node/FeatureNode.cpp +++ b/src/feature_creation/node/FeatureNode.cpp @@ -35,9 +35,30 @@ FeatureNode::FeatureNode(const unsigned long int feat_ind, const std::string exp { if(set_val) { + // Automatically resize the storage arrays + if(node_value_arrs::N_STORE_FEATURES == 0) + { + node_value_arrs::initialize_values_arr(_n_samp, _n_samp_test, 1); + } + else if((_n_samp != node_value_arrs::N_SAMPLES) || (_n_samp_test != node_value_arrs::N_SAMPLES_TEST)) + { + throw std::logic_error( + "Number of samples in current feature is not the same as the others, (" + + std::to_string(_n_samp) + + " and " + std::to_string(_n_samp_test) + + " vs. " + + std::to_string(node_value_arrs::N_SAMPLES) + + " and " + + std::to_string(node_value_arrs::N_SAMPLES_TEST) + + ")" + ); + } + else if(feat_ind >= node_value_arrs::N_STORE_FEATURES) + { + node_value_arrs::resize_values_arr(0, node_value_arrs::N_STORE_FEATURES + 1); + } set_value(); set_test_value(); - } } diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.cpp b/src/feature_creation/node/value_storage/nodes_value_containers.cpp index d58f6256f3f2fc07aeb3fd47f8bd099bd3d4b8e9..c7fa9e35088f3e2a52ae9510727dedcf3d8d7532 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.cpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.cpp @@ -50,50 +50,17 @@ std::vector<double> node_value_arrs::TEMP_STORAGE_TEST_ARR; void node_value_arrs::initialize_values_arr( const int n_samples, const int n_samples_test, - const int n_primary_feat, - const int max_rung, - const bool set_task_sz, - const bool use_params + const int n_primary_feat ) { - if(max_rung < 0) - { - throw std::logic_error("Maximum rung of the features is less than 0"); - } - - if(max_rung == 0) - { - std::cerr << "Warning requested calculation has a maximum rung of 0" << std::endl; - } - - if(set_task_sz) - { - TASK_SZ_TRAIN = {n_samples}; - TASK_START_TRAIN = {0}; - TASK_SZ_TEST = {n_samples_test}; - } - N_SAMPLES = n_samples; N_SAMPLES_TEST = n_samples_test; N_RUNGS_STORED = 0; N_STORE_FEATURES = n_primary_feat; N_PRIMARY_FEATURES = n_primary_feat; - MAX_RUNG = max_rung; - N_OP_SLOTS = 2 * (static_cast<int>(std::pow(2, max_rung)) - 1); VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES); TEST_VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES_TEST); - - TEMP_STORAGE_ARR = std::vector<double>(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1) * N_SAMPLES); - TEMP_STORAGE_REG = std::vector<int>(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1), -1); - - TEMP_STORAGE_TEST_ARR = std::vector<double>(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1) * N_SAMPLES_TEST); - TEMP_STORAGE_TEST_REG = std::vector<int>(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1), -1); - - if(use_params) - { - initialize_param_storage(); - } } void node_value_arrs::initialize_values_arr( @@ -104,19 +71,16 @@ void node_value_arrs::initialize_values_arr( const bool use_params ) { - TASK_SZ_TRAIN = task_sz_train; - TASK_SZ_TEST = task_sz_test; - TASK_START_TRAIN = std::vector<int>(TASK_SZ_TRAIN.size(), 0); - std::copy_n(TASK_SZ_TRAIN.begin(), TASK_SZ_TRAIN.size() - 1, &TASK_START_TRAIN[1]); - initialize_values_arr( std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0), std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0), - n_primary_feat, - max_rung, - false, - use_params + n_primary_feat ); + + set_task_sz_train(task_sz_train); + set_task_sz_test(task_sz_test); + + set_max_rung(max_rung, use_params); } void node_value_arrs::initialize_param_storage() @@ -127,22 +91,62 @@ void node_value_arrs::initialize_param_storage() PARAM_STORAGE_TEST_ARR = std::vector<double>(N_SAMPLES_TEST * (N_PARAM_OP_SLOTS + 1) * MAX_N_THREADS); } +void node_value_arrs::set_max_rung(const int max_rung, bool use_params) +{ + if(max_rung < 0) + { + throw std::logic_error("Maximum rung of the features is less than 0"); + } + + if(max_rung == 0) + { + std::cerr << "Warning requested calculation has a maximum rung of 0" << std::endl; + } + + MAX_RUNG = max_rung; + N_OP_SLOTS = 2 * (static_cast<int>(std::pow(2, max_rung)) - 1); + + TEMP_STORAGE_ARR = std::vector<double>(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1) * N_SAMPLES); + TEMP_STORAGE_REG = std::vector<int>(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1), -1); + + TEMP_STORAGE_TEST_ARR = std::vector<double>(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1) * N_SAMPLES_TEST); + TEMP_STORAGE_TEST_REG = std::vector<int>(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1), -1); + + if(use_params || (N_PARAM_OP_SLOTS > 0)) + { + initialize_param_storage(); + } +} + void node_value_arrs::set_task_sz_train(const std::vector<int> task_sz_train) { - if(std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0) != N_SAMPLES) + if((N_SAMPLES > 0) && (std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0) != N_SAMPLES)) { - throw std::logic_error("The total number of samples has changed, task_sz_train is wrong."); + int n_samp_new = std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0); + throw std::logic_error("The total number of samples has changed from " + std::to_string(N_SAMPLES) + " to " + std::to_string(n_samp_new) + ", task_sz_train is wrong."); } + else if(N_SAMPLES == 0) + { + N_SAMPLES = std::accumulate(task_sz_train.begin(), task_sz_train.end(), 0); + } + TASK_SZ_TRAIN = task_sz_train; + TASK_START_TRAIN = std::vector<int>(TASK_SZ_TRAIN.size(), 0); std::copy_n(TASK_SZ_TRAIN.begin(), TASK_SZ_TRAIN.size() - 1, &TASK_START_TRAIN[1]); } void node_value_arrs::set_task_sz_test(const std::vector<int> task_sz_test) { - if(std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0) != N_SAMPLES_TEST) + if((N_SAMPLES_TEST > 0) && (std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0) != N_SAMPLES_TEST)) { - throw std::logic_error("The total number of test samples has changed, task_sz_test is wrong."); + int n_samp_new = std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0); + throw std::logic_error("The total number of test samples has changed from " + std::to_string(N_SAMPLES_TEST) + " to " + std::to_string(n_samp_new) + ", task_sz_test is wrong."); } + else if(N_SAMPLES_TEST == 0) + { + N_SAMPLES_TEST = std::accumulate(task_sz_test.begin(), task_sz_test.end(), 0); + } + TASK_SZ_TEST = task_sz_test; } @@ -163,6 +167,9 @@ void node_value_arrs::resize_values_arr(const int n_dims, const int n_feat) VALUES_ARR.resize(N_STORE_FEATURES * N_SAMPLES); VALUES_ARR.shrink_to_fit(); + TEST_VALUES_ARR.resize(N_STORE_FEATURES * N_SAMPLES_TEST); + TEST_VALUES_ARR.shrink_to_fit(); + if(n_dims == 0) { N_PRIMARY_FEATURES = N_STORE_FEATURES; @@ -237,3 +244,32 @@ void node_value_arrs::resize_d_matrix_arr(const int n_select) D_MATRIX.resize(N_SELECTED * N_SAMPLES, 0.0); D_MATRIX.shrink_to_fit(); } + +void node_value_arrs::finialize_values_arr() +{ + N_SELECTED = 0; + N_SAMPLES = 0; + N_STORE_FEATURES = 0; + N_PRIMARY_FEATURES = 0; + N_RUNGS_STORED = 0; + N_SAMPLES_TEST = 0; + MAX_N_THREADS = omp_get_max_threads(); + N_OP_SLOTS = 0; + N_PARAM_OP_SLOTS = 0; + MAX_RUNG = 0; + + TEMP_STORAGE_REG.resize(0); + TEMP_STORAGE_TEST_REG.resize(0); + + TASK_SZ_TRAIN.resize(0); + TASK_START_TRAIN.resize(0); + TASK_SZ_TEST.resize(0); + + PARAM_STORAGE_ARR.resize(0); + PARAM_STORAGE_TEST_ARR.resize(0); + D_MATRIX.resize(0); + VALUES_ARR.resize(0); + TEST_VALUES_ARR.resize(0); + TEMP_STORAGE_ARR.resize(0); + TEMP_STORAGE_TEST_ARR.resize(0); +} diff --git a/src/feature_creation/node/value_storage/nodes_value_containers.hpp b/src/feature_creation/node/value_storage/nodes_value_containers.hpp index e1b117b9b56f0424837dc75cbdb33f46ac552423..6967d67f50c6ef85cca1d9daec9664429be604a5 100644 --- a/src/feature_creation/node/value_storage/nodes_value_containers.hpp +++ b/src/feature_creation/node/value_storage/nodes_value_containers.hpp @@ -72,43 +72,24 @@ namespace node_value_arrs extern int N_OP_SLOTS; //!< The number of possible nodes of the binary expression tree that maybe calculated on the fly extern int N_PARAM_OP_SLOTS; //!< The number of possible non-leaf nodes of the binary expression tree + // DocString: node_vals_finalize /** - * @brief Initialize all central storage vectors/descriptive variables - * - * @param n_samples The number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN) - * @param n_samples_test The number of test samples for each feature (Sum of all elements in TASK_SZ_TEST) - * @param n_primary_feat The number of primary features - * @param max_rung The maximum rung for all features - * @param set_test_task_sz If True reset the task_sz vectors - * @param use_params If True set up parameterized feature storage as well + * @brief Resize all storage arrays to be empty */ - void initialize_values_arr( - const int n_samples, - const int n_samples_test, - const int n_primary_feat, - const int max_rung, - const bool set_task_sz, - const bool use_params - ); + void finialize_values_arr(); - // DocString: node_vals_init_no_ts /** - * @brief Initialize all central storage vectors/descriptive variables + * @brief Initialize all central storage vectors/descriptive variables without changing MAX_RUNG * * @param n_samples The number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN) * @param n_samples_test The number of test samples for each feature (Sum of all elements in TASK_SZ_TEST) * @param n_primary_feat The number of primary features - * @param max_rung The maximum rung for all features */ - inline void initialize_values_arr( + void initialize_values_arr( const int n_samples, const int n_samples_test, - const int n_primary_feat, - const int max_rung - ) - { - initialize_values_arr(n_samples, n_samples_test, n_primary_feat, max_rung, true, false); - } + const int n_primary_feat + ); /** * @brief Initialize all central storage vectors/descriptive variables @@ -170,6 +151,13 @@ namespace node_value_arrs */ void set_task_sz_test(const std::vector<int> task_sz_test); + /** + * @brief Set max_rung and initialize the temporary storage arrays + * + * @param max_rung The maximum rung for the calculation + */ + void set_max_rung(const int max_rung, bool use_params=false); + /** * @brief Get the operator slot associated with a given rung/offset * @@ -418,7 +406,7 @@ namespace node_value_arrs #ifdef PY_BINDINGS - // DocString: node_vals_ts_list + // DocString: node_vals_ts_list_no_params /** * @brief Initialize the node value arrays * @details Using the size of the initial feature space constructor the storage arrays @@ -444,7 +432,7 @@ namespace node_value_arrs ); } - // DocString: node_vals_ts_arr + // DocString: node_vals_ts_arr_no_params /** * @brief Initialize the node value arrays * @details Using the size of the initial feature space constructor the storage arrays @@ -469,6 +457,62 @@ namespace node_value_arrs false ); } + + // DocString: node_vals_ts_list + /** + * @brief Initialize the node value arrays + * @details Using the size of the initial feature space constructor the storage arrays + * + * @param task_sz_train (list): The number of training samples per task + * @param task_sz_test (list): The number of test sample per task + * @param n_primary_feat (int): The number of primary features + * @param max_rung (int): The maximum rung for all features + * @param use_params (bool): If true also initialize parameterized storage + */ + inline void initialize_values_arr( + py::list task_sz_train, + py::list task_sz_test, + int n_primary_feat, + int max_rung, + bool use_params + ) + { + initialize_values_arr( + python_conv_utils::from_list<int>(task_sz_train), + python_conv_utils::from_list<int>(task_sz_test), + n_primary_feat, + max_rung, + use_params + ); + } + + // DocString: node_vals_ts_arr + /** + * @brief Initialize the node value arrays + * @details Using the size of the initial feature space constructor the storage arrays + * + * @param task_sz_train (np.ndarray): The number of training samples per task + * @param task_sz_test (np.ndarray): The number of test sample per task + * @param n_primary_feat (int): The number of primary features + * @param max_rung (int): The maximum rung for all features + * @param use_params (bool): If true also initialize parameterized storage + */ + inline void initialize_values_arr( + np::ndarray task_sz_train, + np::ndarray task_sz_test, + int n_primary_feat, + int max_rung, + bool use_params + ) + { + initialize_values_arr( + python_conv_utils::from_ndarray<int>(task_sz_train), + python_conv_utils::from_ndarray<int>(task_sz_test), + n_primary_feat, + max_rung, + use_params + ); + } #endif } diff --git a/src/inputs/InputParser.cpp b/src/inputs/InputParser.cpp index 13f0183a00ad4f58694e35ee4eba711dcd5694ab..271c11ddd3e200893af92ea2a5cc92f03731a125 100644 --- a/src/inputs/InputParser.cpp +++ b/src/inputs/InputParser.cpp @@ -22,16 +22,50 @@ */ #include "inputs/InputParser.hpp" +InputParser::InputParser() : + _filename(""), + _data_file("data.csv"), + _prop_key("prop"), + _prop_label("prop"), + _task_key("Task"), + _calc_type("regression"), + _mpi_comm(mpi_setup::comm), + _cross_cor_max(1.0), + _l_bound(1e-50), + _u_bound(1e50), + _n_dim(1), + _max_rung(0), + _n_rung_store(0), + _n_rung_generate(0), + _n_sis_select(1), + _n_samp(0), + _n_samp_train(0), + _n_samp_test(0), + _n_residual(1), + _n_models_store(1), + _max_param_depth(-1), + _nlopt_seed(42), + _fix_intercept(false), + _global_param_opt(false), + _reparam_residual(false) +{ + if(_mpi_comm == nullptr) + { + mpi_setup::init_mpi_env(); + _mpi_comm = mpi_setup::comm; + } +} InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Interface> comm) : - _opset(as_vector<std::string>(ip, "opset")), - _param_opset(as_vector<std::string>(ip, "param_opset")), + _allowed_ops(as_vector<std::string>(ip, "opset")), + _allowed_param_ops(as_vector<std::string>(ip, "param_opset")), _filename(fn), _data_file(ip.get<std::string>("data_file", "data.csv")), _prop_key(ip.get<std::string>("property_key", "prop")), _task_key(ip.get<std::string>("task_key", "Task")), _calc_type(ip.get<std::string>("calc_type", "regression")), _leave_out_inds(as_vector<int>(ip, "leave_out_inds")), + _mpi_comm(comm), _cross_cor_max(ip.get<double>("max_feat_cross_correlation", 1.0)), _l_bound(ip.get<double>("min_abs_feat_val", 1e-50)), _u_bound(ip.get<double>("max_abs_feat_val", 1e50)), @@ -41,8 +75,8 @@ InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Inter _n_rung_store(ip.get<int>("n_rung_store", _max_rung - 1)), _n_rung_generate(ip.get<int>("n_rung_generate", 0)), _n_samp(0), - _n_residuals(ip.get<int>("n_residual", 1)), - _n_models_store(ip.get<int>("n_models_store", _n_residuals)), + _n_residual(ip.get<int>("n_residual", 1)), + _n_models_store(ip.get<int>("n_models_store", _n_residual)), _max_param_depth(ip.get<int>("max_feat_param_depth", _max_rung)), _nlopt_seed(ip.get<int>("nlopt_seed", 42)), _fix_intercept(ip.get<bool>("fix_intercept", false)), @@ -51,7 +85,7 @@ InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Inter { // Check if param ops are passed without being build with parameterized features #ifndef PARAMETERIZE - if(_param_opset.size() > 0) + if(_allowed_param_ops.size() > 0) { throw std::logic_error("To use parameterized operators please rebuild with -DBUILD_PARAMS=ON"); } @@ -167,9 +201,9 @@ InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Inter } // Broadcast the task sizes/leave out indexes - mpi::broadcast(*comm, _leave_out_inds, 0); - mpi::broadcast(*comm, _task_sizes_test, 0); - mpi::broadcast(*comm, _task_sizes_train, 0); + mpi::broadcast(*_mpi_comm, _leave_out_inds, 0); + mpi::broadcast(*_mpi_comm, _task_sizes_test, 0); + mpi::broadcast(*_mpi_comm, _task_sizes_train, 0); } else if(_leave_out_inds.size() > 0) { @@ -202,9 +236,9 @@ InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Inter _n_samp_test = std::accumulate(_task_sizes_test.begin(), _task_sizes_test.end(), 0); assert(_n_samp_train + _n_samp_test == _n_samp); - if((_opset.size() == 0) && (_param_opset.size() == 0)) + if((_allowed_ops.size() == 0) && (_allowed_param_ops.size() == 0)) { - _opset = { + _allowed_ops = { "exp", "neg_exp", "inv", @@ -226,15 +260,13 @@ InputParser::InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Inter } // Generate a feature space from the data file - generate_feature_space(comm, headers, units, tasks, taskind); + generate_phi_0(headers, units, tasks, taskind); } -void InputParser::generate_feature_space( - std::shared_ptr<MPI_Interface> comm, +void InputParser::generate_phi_0( std::vector<std::string> headers, std::vector<Unit> units, - std::map<std::string, - std::vector<int>> tasks, + std::map<std::string, std::vector<int>> tasks, int taskind ) { @@ -415,55 +447,339 @@ void InputParser::generate_feature_space( // Initialize the central data storage area #ifdef PARAMETERIZE - node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, headers.size(), _max_rung, _param_opset.size() > 0); + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, headers.size(), _max_rung, _allowed_param_ops.size() > 0); #else node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, headers.size(), _max_rung, false); #endif // Create \Phi_0 of primary features - std::vector<node_ptr> phi_0; for(int ff = 0; ff < headers.size(); ++ff) { - phi_0.push_back(std::make_shared<FeatureNode>(ff, headers[ff], data[ff], test_data[ff], units[ff])); + _phi_0.push_back(FeatureNode(ff, headers[ff], data[ff], test_data[ff], units[ff])); } +} - // Create the feature space - #ifdef PARAMETERIZE - _feat_space = std::make_shared<FeatureSpace>( - comm, - phi_0, - _opset, - _param_opset, - _prop_train, - _task_sizes_train, - _calc_type, - _max_rung, - _n_sis_select, - _n_rung_store, - _n_rung_generate, - _cross_cor_max, - _l_bound, - _u_bound, - _max_param_depth, - _reparam_residual +std::vector<node_ptr> InputParser::phi_0_ptrs() const +{ + if(_phi_0.size() == 0) + { + throw std::logic_error("Accessing an unset member (_phi_0)."); + } + std::vector<node_ptr> phi_0_node_ptr(_phi_0.size()); + std::transform( + _phi_0.begin(), + _phi_0.end(), + phi_0_node_ptr.begin(), + [](FeatureNode feat){return std::make_shared<FeatureNode>(feat);} ); - #else - _feat_space = std::make_shared<FeatureSpace>( - comm, - phi_0, - _opset, - _prop_train, - _task_sizes_train, - _calc_type, - _max_rung, - _n_sis_select, - _n_rung_store, - _n_rung_generate, - _cross_cor_max, - _l_bound, - _u_bound + return phi_0_node_ptr; +} + +void InputParser::set_task_sizes_train(std::vector<int> task_sizes_train) +{ + if((_task_sizes_test.size() > 0) && (_task_sizes_test.size() != task_sizes_train.size())) + { + throw std::logic_error("The updated task_sizes_train is not of the same size as the existing _task_sizes_test."); + } + else if(_task_sizes_test.size() == 0) + { + _task_sizes_test.resize(task_sizes_train.size(), 0); + _n_samp_test = 0; + } + + if((_task_names.size() > 0) && (_task_names.size() != task_sizes_train.size())) + { + throw std::logic_error("The updated task_sizes_train is not of the same size as the existing _task_names."); + } + + _task_sizes_train = task_sizes_train; + _n_samp_train = std::accumulate(task_sizes_train.begin(), task_sizes_train.end(), 0); + _n_samp = _n_samp_train + _n_samp_test; + + if((_prop_train.size() > 0) && (_prop_train.size() != _n_samp_train)) + { + throw std::logic_error("The total number of samples in the updated task size vector is not the same as the number of samples of the property vector for the training set."); + } + + for(auto& feat : _phi_0) + { + if(feat.n_samp() != _n_samp_train) + { + throw std::logic_error("The total number of samples in the updated task size vector is not the same as the number of samples of one of the features in the training set."); + } + } + + if((_sample_ids_train.size() > 0) && (_sample_ids_train.size() != _n_samp_train)) + { + throw std::logic_error("The total number of samples in the updated task size vector is not the same as the number of samples ids for the training set."); + } + node_value_arrs::set_task_sz_train(_task_sizes_train); +} + +void InputParser::set_task_sizes_test(std::vector<int> task_sizes_test) +{ + if((_task_sizes_train.size() > 0) && (_task_sizes_train.size() != task_sizes_test.size())) + { + throw std::logic_error("The updated task_sizes_test is not of the same size as the existing _task_sizes_train."); + } + else if(_task_sizes_train.size() == 0) + { + _task_sizes_train.resize(task_sizes_test.size(), 0); + _n_samp_train = 0; + } + + if((_task_names.size() > 0) && (_task_names.size() != task_sizes_test.size())) + { + throw std::logic_error("The updated task_sizes_test is not of the same size as the existing _task_names."); + } + + _task_sizes_test = task_sizes_test; + _n_samp_test = std::accumulate(task_sizes_test.begin(), task_sizes_test.end(), 0); + _n_samp = _n_samp_train + _n_samp_test; + + if((_prop_test.size() > 0) && (_prop_test.size() != _n_samp_test)) + { + throw std::logic_error("The total number of samples in the updated task size vector is not the same as the number of samples of the property vector for the test set."); + } + + if((_leave_out_inds.size() > 0) && (_leave_out_inds.size() != _n_samp_test)) + { + throw std::logic_error("The total number of samples in the updated task size vector is not the same as the number of samples left out as the test set."); + } + + for(auto& feat : _phi_0) + { + if(feat.n_samp_test() != _n_samp_test) + { + throw std::logic_error("The total number of samples in the updated task size vector is not the same as the number of samples of one of the features in the test set."); + } + } + + if((_sample_ids_test.size() > 0) && (_sample_ids_test.size() != _n_samp_test)) + { + throw std::logic_error("The total number of samples in the updated task size vector is not the same as the number of samples ids for the test set."); + } + node_value_arrs::set_task_sz_test(_task_sizes_test); +} + +void InputParser::set_task_names(std::vector<std::string> task_names) +{ + if((_task_sizes_test.size() > 0) && (_task_sizes_test.size() != task_names.size())) + { + throw std::logic_error("The updated task_sizes_name is not of the same size as the existing _task_sizes_test."); + } + else if(_task_sizes_test.size() == 0) + { + _task_sizes_test.resize(task_names.size(), 0); + _n_samp_test = 0; + } + + if((_task_sizes_train.size() > 0) && (_task_sizes_train.size() != task_names.size())) + { + throw std::logic_error("The updated task_sizes_name is not of the same size as the existing _task_sizes_train."); + } + else if(_task_sizes_train.size() == 0) + { + _task_sizes_train.resize(task_names.size(), 0); + _n_samp_test = 0; + } + + _task_names = task_names; +} +void InputParser::set_prop_train(std::vector<double> prop_train) +{ + if((_n_samp_train != 0) && (prop_train.size() != _n_samp_train)) + { + throw std::logic_error("The number of samples in the property vector is not the same as the expected number of samples in the training set."); + } + + for(auto& feat : _phi_0) + { + if(feat.n_samp() != prop_train.size()) + { + throw std::logic_error("The number of samples in the property vector is not the same as the number of samples of one of the features in the training set."); + } + } + + if((_sample_ids_train.size() > 0) && (_sample_ids_train.size() != prop_train.size())) + { + throw std::logic_error("The number of samples in the property vector is not the same as the number of samples ids for the training set."); + } + + _prop_train = prop_train; +} + +void InputParser::set_prop_test(std::vector<double> prop_test) +{ + if((_n_samp_test != 0) && (prop_test.size() != _n_samp_test)) + { + throw std::logic_error("The number of samples in the property vector is not the same as the expected number of samples in the test set."); + } + + if((_leave_out_inds.size() > 0) && (_leave_out_inds.size() != prop_test.size())) + { + throw std::logic_error("The number of samples in the property vector is not the same as the number of samples left out as the test set."); + } + + for(auto& feat : _phi_0) + { + if(feat.n_samp_test() != prop_test.size()) + { + throw std::logic_error("The number of samples in the property vector is not the same as the number of samples of one of the features in the test set."); + } + } + + if((_sample_ids_test.size() > 0) && (_sample_ids_test.size() != prop_test.size())) + { + throw std::logic_error("The number of samples in the property vector is not the same as the number of samples ids for the test set."); + } + + _prop_test = prop_test; +} + +void InputParser::set_leave_out_inds(std::vector<int> leave_out_inds) +{ + if((_n_samp_test != 0) && (leave_out_inds.size() != _n_samp_test)) + { + throw std::logic_error("The number of samples left out as the test set is not the same as the expected number of samples in the test set."); + } + + if((_prop_test.size() > 0) && (_prop_test.size() != leave_out_inds.size())) + { + throw std::logic_error("The number of samples left out as the test set is not the same as the number of samples of the property vector for the test set."); + } + + for(auto& feat : _phi_0) + { + if(feat.n_samp_test() != leave_out_inds.size()) + { + throw std::logic_error("The number of samples left out as the test set is not the same as the number of samples of one of the features in the test set."); + } + } + + if((_sample_ids_test.size() > 0) && (_sample_ids_test.size() != leave_out_inds.size())) + { + throw std::logic_error("The number of samples left out as the test set is not the same as the number of samples ids for the test set."); + } + + _leave_out_inds = leave_out_inds; +} + +void InputParser::set_sample_ids_train(std::vector<std::string> sample_ids_train) +{ + if((_n_samp_train != 0) && (sample_ids_train.size() != _n_samp_train)) + { + throw std::logic_error("The number of samples is not the same as the expected number of samples in the training set."); + } + + for(auto& feat : _phi_0) + { + if(feat.n_samp() != sample_ids_train.size()) + { + throw std::logic_error("The number of samples is not the same as the number of samples of one of the features in the training set."); + } + } + + if((_prop_train.size() > 0) && (_prop_train.size() != sample_ids_train.size())) + { + throw std::logic_error("The number of samples is not the same as the number of samples of the property vector for the training set."); + } + + _sample_ids_train = sample_ids_train; +} + +void InputParser::set_sample_ids_test(std::vector<std::string> sample_ids_test) +{ + if((_n_samp_test != 0) && (sample_ids_test.size() != _n_samp_test)) + { + throw std::logic_error("The number of sample ids is not the same as the expected number of samples in the test set."); + } + + if((_leave_out_inds.size() > 0) && (_leave_out_inds.size() != sample_ids_test.size())) + { + throw std::logic_error("The number of sample ids is not the same as the number of samples left out as the test set."); + } + + for(auto& feat : _phi_0) + { + if(feat.n_samp_test() != sample_ids_test.size()) + { + throw std::logic_error("The number of sample ids is not the same as the number of samples of one of the features in the test set."); + } + } + + if((_prop_test.size() > 0) && (_prop_test.size() != sample_ids_test.size())) + { + throw std::logic_error("The number of sample ids is not the same as the number of samples of the property vector for the test set."); + } + + _sample_ids_test = sample_ids_test; +} + +void InputParser::set_phi_0(std::vector<FeatureNode> phi_0) +{ + assert(phi_0.size() > 0); + + int n_samp_train_feats = phi_0[0].n_samp(); + int n_samp_test_feats = phi_0[0].n_samp_test(); + + for(int ff = 1; ff < phi_0.size(); ++ff) + { + if(phi_0[ff].n_samp() != n_samp_train_feats) + { + throw std::logic_error("Not all the features in phi_0 have the same number of training samples."); + } + + if(phi_0[ff].n_samp_test() != n_samp_test_feats) + { + throw std::logic_error("Not all the features in phi_0 have the same number of test samples."); + } + + } + + if((_n_samp_train != 0) && (n_samp_train_feats != _n_samp_train)) + { + throw std::logic_error("The total number of samples in the updated primary feature set is not the same as the expected number of samples in the training set."); + } + + if((_prop_train.size() > 0) && (_prop_train.size() != n_samp_train_feats)) + { + throw std::logic_error("The total number of samples in the updated primary feature set is not the same as the number of samples of the property vector for the training set."); + } + + if((_sample_ids_train.size() > 0) && (_sample_ids_train.size() != n_samp_train_feats)) + { + throw std::logic_error("The total number of samples in the updated primary feature set is not the same as the number of samples ids for the training set."); + } + + if((_n_samp_test != 0) && (n_samp_test_feats != _n_samp_test)) + { + throw std::logic_error("The total number of samples in the updated primary feature set is not the same as the expected number of samples in the test set."); + } + + if((_leave_out_inds.size() > 0) && (_leave_out_inds.size() != n_samp_test_feats)) + { + throw std::logic_error("The total number of samples in the updated primary feature set is not the same as the number of samples left out as the test set."); + } + + if((_prop_test.size() > 0) && (_prop_test.size() != n_samp_test_feats)) + { + throw std::logic_error("The total number of samples in the updated primary feature set is not the same as the number of samples of the property vector for the test set."); + } + + if((_sample_ids_test.size() > 0) && (_sample_ids_test.size() != n_samp_test_feats)) + { + throw std::logic_error("The total number of samples in the updated primary feature set is not the same as the number of samples ids for the test set."); + } + _phi_0 = phi_0; + node_value_arrs::initialize_values_arr( + _phi_0[0].n_samp(), _phi_0[0].n_samp_test(), _phi_0.size() ); - #endif + for(auto& feat : _phi_0) + { + feat.set_value(); + feat.set_test_value(); + } } void strip_comments(std::string& filename) diff --git a/src/inputs/InputParser.hpp b/src/inputs/InputParser.hpp index 21df1c8fdafa7a1add0e820d5fdd3fd754e08cab..3270238d65ec627b4156b44d8288b28292cf3c69 100644 --- a/src/inputs/InputParser.hpp +++ b/src/inputs/InputParser.hpp @@ -27,6 +27,7 @@ #include <boost/filesystem.hpp> #include <boost/property_tree/ptree.hpp> #include <boost/property_tree/json_parser.hpp> +#include <boost/serialization/vector.hpp> #include <iterator> #include <iostream> @@ -35,8 +36,16 @@ #include <random> #include <chrono> -#include "feature_creation/feature_space/FeatureSpace.hpp" -#include "feature_creation/node/value_storage/nodes_value_containers.hpp" +#include "feature_creation/node/FeatureNode.hpp" +#include "mpi_interface/MPI_Interface.hpp" +#include "nl_opt/NLOptWrapper.hpp" + +#ifdef PY_BINDINGS +#include "python/py_binding_cpp_def/conversion_utils.hpp" + +namespace np = boost::python::numpy; +namespace py = boost::python; +#endif namespace pt = boost::property_tree; @@ -47,13 +56,13 @@ namespace pt = boost::property_tree; */ class InputParser { -public: +private: std::vector<std::string> _sample_ids_train; //!< Vector storing all sample ids for the training samples std::vector<std::string> _sample_ids_test; //!< Vector storing all sample ids for the test samples std::vector<std::string> _task_names; //!< Vector storing the ID of the task names - std::vector<std::string> _param_opset; //!< Vector containing all allowed operators strings for operators with free parameters - std::vector<std::string> _opset; //!< Vector containing all allowed operators strings + std::vector<std::string> _allowed_param_ops; //!< Vector containing all allowed operators strings for operators with free parameters + std::vector<std::string> _allowed_ops; //!< Vector containing all allowed operators strings std::vector<double> _prop_train; //!< The value of the property to evaluate the loss function against for the training set std::vector<double> _prop_test; //!< The value of the property to evaluate the loss function against for the test set @@ -61,68 +70,919 @@ public: std::vector<int> _task_sizes_train; //!< Number of training samples per task std::vector<int> _task_sizes_test; //!< Number of testing samples per task + std::vector<FeatureNode> _phi_0; //!< A vector of FeatureNodes for the primary feature space + Unit _prop_unit; //!< The Unit of the property - const std::string _filename; //!< Name of the input file - const std::string _data_file; //!< Name of the data file - const std::string _prop_key; //!< Key used to find the property column in the data file + std::string _filename; //!< Name of the input file + std::string _data_file; //!< Name of the data file + std::string _prop_key; //!< Key used to find the property column in the data file std::string _prop_label; //!< The label of the property - const std::string _task_key; //!< Key used to find the task column in the data file - const std::string _calc_type; //!< The type of LossFunction to use when projecting the features onto a property + std::string _task_key; //!< Key used to find the task column in the data file + std::string _calc_type; //!< The type of LossFunction to use when projecting the features onto a property - std::shared_ptr<FeatureSpace> _feat_space; //!< shared_ptr to the FeatureSpace generated from the data file and the input file + std::shared_ptr<MPI_Interface> _mpi_comm; //!< The MPI communicator for the calculation double _cross_cor_max; //!< Maximum cross-correlation used for selecting features double _l_bound; //!< The lower bound for the maximum absolute value of the features double _u_bound; //!< The upper bound for the maximum absolute value of the features - const int _n_dim; //!< The maximum number of features allowed in the linear model - const int _max_rung; //!< Maximum rung for the feature creation - const int _n_rung_store; //!< The number of rungs to calculate and store the value of the features for all samples - const int _n_rung_generate; //!< Either 0 or 1, and is the number of rungs to generate on the fly during SIS - const int _n_sis_select; //!< Number of features to select during each SIS iteration + int _n_dim; //!< The maximum number of features allowed in the linear model + int _max_rung; //!< Maximum rung for the feature creation + int _n_rung_store; //!< The number of rungs to calculate and store the value of the features for all samples + int _n_rung_generate; //!< Either 0 or 1, and is the number of rungs to generate on the fly during SIS + int _n_sis_select; //!< Number of features to select during each SIS iteration int _n_samp; //!< Number of samples in the data set int _n_samp_train; //!< Number of samples in the training set int _n_samp_test; //!< Number of samples in the test set - const int _n_residuals; //!< Number of residuals to pass to the next sis model - const int _n_models_store; //!< The number of models to output to files - const int _max_param_depth; //!< The maximum depth in the binary expression tree to set non-linear optimization - const int _nlopt_seed; //!< The seed used for the nlOpt library + int _n_residual; //!< Number of residuals to pass to the next sis model + int _n_models_store; //!< The number of models to output to files + int _max_param_depth; //!< The maximum depth in the binary expression tree to set non-linear optimization + int _nlopt_seed; //!< The seed used for the nlOpt library - const bool _fix_intercept; //!< If true the bias term is fixed at 0 - const bool _global_param_opt; //!< True if global optimization is requested for non-linear optimization of parameters (Can break reproducibility) - const bool _reparam_residual; //!< If True then reparameterize features using the residuals of each model + bool _fix_intercept; //!< If true the bias term is fixed at 0 + bool _global_param_opt; //!< True if global optimization is requested for non-linear optimization of parameters (Can break reproducibility) + bool _reparam_residual; //!< If True then reparameterize features using the residuals of each model + +public: + // DocString: inputs_init_default + /** + * @brief Default constructor (Sets all values to the default valuse) + * @details [long description] + */ + InputParser(); /** * @brief Constructor of the InputParser * - * @param IP Property tree generated from json file * @param fn filename for the input file * @param comm MPI communicator for the calculation */ - InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Interface> comm); + InputParser(std::string fn, std::shared_ptr<MPI_Interface> comm); /** - * @brief The generated FeatureSpace + * @brief Constructor of the InputParser + * + * @param IP Property tree generated from json file + * @param fn filename for the input file + * @param comm MPI communicator for the calculation */ - inline std::shared_ptr<FeatureSpace> feat_space() const {return _feat_space;} + InputParser(pt::ptree ip, std::string fn, std::shared_ptr<MPI_Interface> comm); /** * @brief Generate the feature space from input files and parameters * - * @param comm MPI communicator for the calculation * @param headers column headers for all columns in the data file (expr of the Nodes) * @param units The units for the features * @param tasks map where keys are the task name and values are the number of samples in each task * @param taskind index in the columns that correspond the the task column */ - void generate_feature_space( - std::shared_ptr<MPI_Interface> comm, + void generate_phi_0( std::vector<std::string> headers, std::vector<Unit> units, std::map<std::string, std::vector<int>> tasks, int taskind ); + + /** + * @brief The MPI communicator for the calculation + */ + inline std::shared_ptr<MPI_Interface> mpi_comm(){return _mpi_comm;} + + /** + * @brief Vector storing all sample ids for the training samples + */ + inline const std::vector<std::string>& sample_ids_train() const + { + if(_sample_ids_train.size() == 0) + { + throw std::logic_error("Accessing an unset member (_sample_ids_train)."); + } + return _sample_ids_train; + } + + /** + * @brief Vector storing all sample ids for the training samples + */ + inline std::vector<std::string> sample_ids_train_copy() const + { + if(_sample_ids_train.size() == 0) + { + throw std::logic_error("Accessing an unset member (_sample_ids_train)."); + } + return _sample_ids_train; + } + + /** + * @brief Set Vector storing all sample ids for the training samples + */ + void set_sample_ids_train(std::vector<std::string> sample_ids_train); + + /** + * @brief Vector storing all sample ids for the test samples + */ + inline const std::vector<std::string>& sample_ids_test() const + { + if((_n_samp_test != 0) && (_sample_ids_test.size() != _n_samp_test)) + { + throw std::logic_error("Accessing an unset member (_sample_ids_test)."); + } + return _sample_ids_test; + } + + /** + * @brief Vector storing all sample ids for the test samples + */ + inline std::vector<std::string> sample_ids_test_copy() const + { + if((_n_samp_test != 0) && (_sample_ids_test.size() != _n_samp_test)) + { + throw std::logic_error("Accessing an unset member (_sample_ids_test)."); + } + return _sample_ids_test; + } + + /** + * @brief Set Vector storing all sample ids for the test samples + */ + void set_sample_ids_test(std::vector<std::string> sample_ids_test); + + /** + * @brief Vector storing the ID of the task names + */ + inline const std::vector<std::string>& task_names() const + { + if(_task_names.size() == 0) + { + throw std::logic_error("Accessing an unset member (_task_names)."); + } + return _task_names; + } + + /** + * @brief Vector storing the ID of the task names + */ + inline std::vector<std::string> task_names_copy() const + { + if(_task_names.size() == 0) + { + throw std::logic_error("Accessing an unset member (_task_names)."); + } + return _task_names; + } + + /** + * @brief Set Vector storing the ID of the task names + */ + void set_task_names(std::vector<std::string> task_names); + + /** + * @brief Vector containing all allowed operators strings for operators with free parameters + */ + inline const std::vector<std::string>& allowed_param_ops() const {return _allowed_param_ops;} + + /** + * @brief Vector containing all allowed operators strings for operators with free parameters + */ + inline std::vector<std::string> allowed_param_ops_copy() const {return _allowed_param_ops;} + + /** + * @brief Set Vector containing all allowed operators strings for operators with free parameters + */ + inline void set_allowed_param_ops(std::vector<std::string> allowed_param_ops) {_allowed_param_ops = allowed_param_ops;} + + /** + * @brief Vector containing all allowed operators strings + */ + inline const std::vector<std::string>& allowed_ops() const {return _allowed_ops;} + + /** + * @brief Vector containing all allowed operators strings + */ + inline std::vector<std::string> allowed_ops_copy() const {return _allowed_ops;} + + /** + * @brief Set Vector containing all allowed operators strings + */ + inline void set_allowed_ops(std::vector<std::string> allowed_ops) {_allowed_ops = allowed_ops;} + + /** + * @brief The value of the property to evaluate the loss function against for the training set + */ + inline const std::vector<double>& prop_train() const + { + if(_prop_train.size() == 0) + { + throw std::logic_error("Accessing an unset member (_prop_train)."); + } + return _prop_train; + } + + /** + * @brief The value of the property to evaluate the loss function against for the training set + */ + inline std::vector<double> prop_train_copy() const + { + if(_prop_train.size() == 0) + { + throw std::logic_error("Accessing an unset member (_prop_train)."); + } + return _prop_train; + } + + /** + * @brief Set The value of the property to evaluate the loss function against for the training set + */ + void set_prop_train(std::vector<double> prop_train); + + /** + * @brief The value of the property to evaluate the loss function against for the test set + */ + inline const std::vector<double>& prop_test() const + { + if(_prop_test.size() != _n_samp_test) + { + throw std::logic_error("Accessing an unset member (_prop_test)."); + } + return _prop_test; + } + + /** + * @brief The value of the property to evaluate the loss function against for the test set + */ + inline std::vector<double> prop_test_copy() const + { + if(_prop_test.size() != _n_samp_test) + { + throw std::logic_error("Accessing an unset member (_prop_test)."); + } + return _prop_test; + } + + /** + * @brief Set The value of the property to evaluate the loss function against for the test set + */ + void set_prop_test(std::vector<double> prop_test); + + /** + * @brief List of indexes from the initial data file in the test set + */ + inline const std::vector<int>& leave_out_inds() const + { + if(_leave_out_inds.size() != _n_samp_test) + { + throw std::logic_error("Accessing an unset member (_leave_out_inds)."); + } + return _leave_out_inds; + } + + /** + * @brief List of indexes from the initial data file in the test set + */ + inline std::vector<int> leave_out_inds_copy() const + { + if(_leave_out_inds.size() != _n_samp_test) + { + throw std::logic_error("Accessing an unset member (_leave_out_inds)."); + } + return _leave_out_inds; + } + + /** + * @brief Set List of indexes from the initial data file in the test set + */ + void set_leave_out_inds(std::vector<int> leave_out_inds); + + /** + * @brief Number of training samples per task + */ + inline const std::vector<int>& task_sizes_train() const + { + if(_task_sizes_train.size() == 0) + { + throw std::logic_error("Accessing an unset member (_task_sizes_train)."); + } + return _task_sizes_train; + } + + /** + * @brief Number of training samples per task + */ + inline std::vector<int> task_sizes_train_copy() const + { + if(_task_sizes_train.size() == 0) + { + throw std::logic_error("Accessing an unset member (_task_sizes_train)."); + } + return _task_sizes_train; + } + + /** + * @brief Set Number of training samples per task + */ + void set_task_sizes_train(std::vector<int> task_sizes_train); + + /** + * @brief Number of testing samples per task + */ + inline const std::vector<int>& task_sizes_test() const + { + if(_task_sizes_test.size() == 0) + { + throw std::logic_error("Accessing an unset member (_task_sizes_test)."); + } + return _task_sizes_test; + } + + /** + * @brief Number of testing samples per task + */ + inline std::vector<int> task_sizes_test_copy() const + { + if(_task_sizes_test.size() == 0) + { + throw std::logic_error("Accessing an unset member (_task_sizes_test)."); + } + return _task_sizes_test; + } + + /** + * @brief Set Number of testing samples per task + */ + void set_task_sizes_test(std::vector<int> task_sizes_test); + + /** + * @brief A vector of FeatureNodes for the primary feature space + */ + inline const std::vector<FeatureNode>& phi_0() const + { + if(_phi_0.size() == 0) + { + throw std::logic_error("Accessing an unset member (_phi_0)."); + } + return _phi_0; + } + + /** + * @brief A vector of FeatureNodes for the primary feature space + */ + inline std::vector<FeatureNode> phi_0_copy() const + { + if(_phi_0.size() == 0) + { + throw std::logic_error("Accessing an unset member (_phi_0)."); + } + return _phi_0; + } + + /** + * @brief Return phi_0 as a vector of node_ptrs + * + */ + std::vector<node_ptr> phi_0_ptrs() const; + + /** + * @brief Set A vector of FeatureNodes for the primary feature space + */ + void set_phi_0(std::vector<FeatureNode> phi_0); + + + // DocString: inputs_get_prop_unit + /** + * @brief The Unit of the property + */ + inline Unit prop_unit() const {return _prop_unit;} + + // DocString: inputs_set_prop_unit + /** + * @brief Set The Unit of the property + */ + inline void set_prop_unit(Unit prop_unit) {_prop_unit = prop_unit;} + + // DocString: inputs_get_filename + /** + * @brief Name of the input file + */ + inline std::string filename() const {return _filename;} + + // DocString: inputs_set_filename + /** + * @brief Set Name of the input file + */ + inline void set_filename(const std::string filename) {_filename = filename;} + + // DocString: inputs_get_data_file + /** + * @brief Name of the data file + */ + inline std::string data_file() const {return _data_file;} + + // DocString: inputs_set_data_file + /** + * @brief Set Name of the data file + */ + inline void set_data_file(const std::string data_file) {_data_file = data_file;} + + // DocString: inputs_get_prop_key + /** + * @brief Key used to find the property column in the data file + */ + inline std::string prop_key() const {return _prop_key;} + + // DocString: inputs_set_prop_key + /** + * @brief Set Key used to find the property column in the data file + */ + inline void set_prop_key(const std::string prop_key) {_prop_key = prop_key;} + + // DocString: inputs_get_prop_label + /** + * @brief The label of the property + */ + inline std::string prop_label() const {return _prop_label;} + + // DocString: inputs_set_prop_label + /** + * @brief Set The label of the property + */ + inline void set_prop_label(std::string prop_label) {_prop_label = prop_label;} + + // DocString: inputs_get_task_key + /** + * @brief Key used to find the task column in the data file + */ + inline std::string task_key() const {return _task_key;} + + // DocString: inputs_set_task_key + /** + * @brief Set Key used to find the task column in the data file + */ + inline void set_task_key(const std::string task_key) {_task_key = task_key;} + + // DocString: inputs_get_calc_type + /** + * @brief The type of LossFunction to use when projecting the features onto a property + */ + inline std::string calc_type() const {return _calc_type;} + + // DocString: inputs_set_calc_type + /** + * @brief Set The type of LossFunction to use when projecting the features onto a property + */ + inline void set_calc_type(const std::string calc_type) {_calc_type = calc_type;} + + // DocString: inputs_get_cross_cor_max + /** + * @brief Maximum cross-correlation used for selecting features + */ + inline double cross_cor_max() const {return _cross_cor_max;} + + // DocString: inputs_set_cross_cor_max + /** + * @brief Set Maximum cross-correlation used for selecting features + */ + inline void set_cross_cor_max(double cross_cor_max) {_cross_cor_max = cross_cor_max;} + + // DocString: inputs_get_l_bound + /** + * @brief The lower bound for the maximum absolute value of the features + */ + inline double l_bound() const {return _l_bound;} + + // DocString: inputs_set_l_bound + /** + * @brief Set The lower bound for the maximum absolute value of the features + */ + inline void set_l_bound(double l_bound) + { + if(l_bound > _u_bound) + { + throw std::logic_error("The new lower bound is larger than the current upper bound");; + } + _l_bound = l_bound; + } + + // DocString: inputs_get_u_bound + /** + * @brief The upper bound for the maximum absolute value of the features + */ + inline double u_bound() const {return _u_bound;} + + // DocString: inputs_set_u_bound + /** + * @brief Set The upper bound for the maximum absolute value of the features + */ + inline void set_u_bound(double u_bound) + { + if(u_bound < _l_bound) + { + throw std::logic_error("The new upper bound is smaller than the current lower bound");; + } + _u_bound = u_bound; + } + + // DocString: inputs_get_n_dim + /** + * @brief The maximum number of features allowed in the linear model + */ + inline int n_dim() const {return _n_dim;} + + // DocString: inputs_set_n_dim + /** + * @brief Set The maximum number of features allowed in the linear model + */ + inline void set_n_dim(const int n_dim) {_n_dim = n_dim;} + + // DocString: inputs_get_max_rung + /** + * @brief Maximum rung for the feature creation + */ + inline int max_rung() const {return _max_rung;} + + // DocString: inputs_set_max_rung + /** + * @brief Set Maximum rung for the feature creation + */ + inline void set_max_rung(const int max_rung) + { + _max_rung = max_rung; + node_value_arrs::set_max_rung(max_rung, _allowed_param_ops.size() > 0); + } + + // DocString: inputs_get_n_rung_store + /** + * @brief The number of rungs to calculate and store the value of the features for all samples + */ + inline int n_rung_store() const {return _n_rung_store;} + + // DocString: inputs_set_n_rung_store + /** + * @brief Set The number of rungs to calculate and store the value of the features for all samples + */ + inline void set_n_rung_store(const int n_rung_store) {_n_rung_store = n_rung_store;} + + // DocString: inputs_get_n_rung_generate + /** + * @brief Either 0 or 1, and is the number of rungs to generate on the fly during SIS + */ + inline int n_rung_generate() const {return _n_rung_generate;} + + // DocString: inputs_set_n_rung_generate + /** + * @brief Set Either 0 or 1, and is the number of rungs to generate on the fly during SIS + */ + inline void set_n_rung_generate(const int n_rung_generate) {_n_rung_generate = n_rung_generate;} + + // DocString: inputs_get_n_sis_select + /** + * @brief Number of features to select during each SIS iteration + */ + inline int n_sis_select() const {return _n_sis_select;} + + // DocString: inputs_set_n_sis_select + /** + * @brief Set Number of features to select during each SIS iteration + */ + inline void set_n_sis_select(const int n_sis_select) {_n_sis_select = n_sis_select;} + + // DocString: inputs_get_n_samp + /** + * @brief Number of samples in the data set + */ + inline int n_samp() const {return _n_samp;} + + // DocString: inputs_get_n_samp_train + /** + * @brief Number of samples in the training set + */ + inline int n_samp_train() const {return _n_samp_train;} + + // DocString: inputs_get_n_samp_test + /** + * @brief Number of samples in the test set + */ + inline int n_samp_test() const {return _n_samp_test;} + + // DocString: inputs_get_n_residual + /** + * @brief Number of residuals to pass to the next sis model + */ + inline int n_residual() const {return _n_residual;} + + // DocString: inputs_set_n_residual + /** + * @brief Set Number of residuals to pass to the next sis model + */ + inline void set_n_residual(const int n_residual) {_n_residual = n_residual;} + + // DocString: inputs_get_n_models_store + /** + * @brief The number of models to output to files + */ + inline int n_models_store() const {return _n_models_store;} + + // DocString: inputs_set_n_models_store + /** + * @brief Set The number of models to output to files + */ + inline void set_n_models_store(const int n_models_store) {_n_models_store = n_models_store;} + + // DocString: inputs_get_max_param_depth + /** + * @brief The maximum depth in the binary expression tree to set non-linear optimization + */ + inline int max_param_depth() const {return _max_param_depth;} + + // DocString: inputs_set_max_param_depth + /** + * @brief Set The maximum depth in the binary expression tree to set non-linear optimization + */ + inline void set_max_param_depth(const int max_param_depth) {_max_param_depth = max_param_depth;} + + // DocString: inputs_get_nlopt_seed + /** + * @brief The seed used for the nlOpt library + */ + inline int nlopt_seed() const {return _nlopt_seed;} + + // DocString: inputs_set_nlopt_seed + /** + * @brief Set The seed used for the nlOpt library + */ + inline void set_nlopt_seed(const int nlopt_seed) {_nlopt_seed = nlopt_seed;} + + // DocString: inputs_get_fix_intercept + /** + * @brief If true the bias term is fixed at 0 + */ + inline bool fix_intercept() const {return _fix_intercept;} + + // DocString: inputs_set_fix_intercept + /** + * @brief Set If true the bias term is fixed at 0 + */ + inline void set_fix_intercept(const bool fix_intercept) {_fix_intercept = fix_intercept;} + + // DocString: inputs_get_global_param_opt + /** + * @brief True if global optimization is requested for non-linear optimization of parameters (Can break reproducibility) + */ + inline bool global_param_opt() const {return _global_param_opt;} + + // DocString: inputs_set_global_param_opt + /** + * @brief Set True if global optimization is requested for non-linear optimization of parameters (Can break reproducibility) + */ + inline void set_global_param_opt(const bool global_param_opt) {_global_param_opt = global_param_opt;} + + // DocString: inputs_get_reparam_residual + /** + * @brief If True then reparameterize features using the residuals of each model + */ + inline bool reparam_residual() const {return _reparam_residual;} + + // DocString: inputs_set_reparam_residual + /** + * @brief Set If True then reparameterize features using the residuals of each model + */ + inline void set_reparam_residual(const bool reparam_residual) {_reparam_residual = reparam_residual;} + +#ifdef PY_BINDINGS + // DocString: inputs_get_sample_ids_train_py + /** + * @brief Vector storing all sample ids for the training samples + */ + inline py::list sample_ids_train_py() const + { + return python_conv_utils::to_list<std::string>(sample_ids_train_copy()); + } + + // DocString: inputs_set_sample_ids_train_py + /** + * @brief Set Vector storing all sample ids for the training samples + */ + inline void set_sample_ids_train_py(py::list sample_ids_train) + { + set_sample_ids_train(python_conv_utils::from_list<std::string>(sample_ids_train)); + } + + // DocString: inputs_get_sample_ids_test_py + /** + * @brief Vector storing all sample ids for the test samples + */ + inline py::list sample_ids_test_py() const + { + return python_conv_utils::to_list<std::string>(sample_ids_test_copy()); + } + + // DocString: inputs_set_sample_ids_test_py + /** + * @brief Set Vector storing all sample ids for the test samples + */ + inline void set_sample_ids_test_py(py::list sample_ids_test) + { + set_sample_ids_test(python_conv_utils::from_list<std::string>(sample_ids_test)); + } + + // DocString: inputs_get_task_names_py + /** + * @brief Vector storing the ID of the task names + */ + inline py::list task_names_py() const + { + return python_conv_utils::to_list<std::string>(task_names_copy()); + } + + // DocString: inputs_set_task_names_py + /** + * @brief Set Vector storing the ID of the task names + */ + inline void set_task_names_py(py::list task_names) + { + set_task_names(python_conv_utils::from_list<std::string>(task_names)); + } + + // DocString: inputs_get_allowed_param_ops_py + /** + * @brief Vector containing all allowed operators strings for operators with free parameters + */ + inline py::list allowed_param_ops_py() const + { + return python_conv_utils::to_list<std::string>(allowed_param_ops_copy()); + } + + // DocString: inputs_set_allowed_param_ops_py + /** + * @brief Set Vector containing all allowed operators strings for operators with free parameters + */ + inline void set_allowed_param_ops_py(py::list allowed_param_ops) + { + set_allowed_param_ops(python_conv_utils::from_list<std::string>(allowed_param_ops)); + } + + // DocString: inputs_get_allowed_ops_py + /** + * @brief Vector containing all allowed operators strings + */ + inline py::list allowed_ops_py() const + { + return python_conv_utils::to_list<std::string>(allowed_ops_copy()); + } + + // DocString: inputs_set_allowed_ops_py + /** + * @brief Set Vector containing all allowed operators strings + */ + inline void set_allowed_ops_py(py::list allowed_ops) + { + set_allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)); + } + + // DocString: inputs_get_prop_train_list + /** + * @brief The value of the property to evaluate the loss function against for the training set + */ + inline py::list prop_train_list() const + { + return python_conv_utils::to_list<double>(prop_train_copy()); + } + + // DocString: inputs_set_prop_train_list + /** + * @brief Set The value of the property to evaluate the loss function against for the training set + */ + inline void set_prop_train_list(py::list prop_train) + { + set_prop_train(python_conv_utils::from_list<double>(prop_train)); + } + + // DocString: inputs_get_prop_test_list + /** + * @brief The value of the property to evaluate the loss function against for the test set + */ + inline py::list prop_test_list() const + { + return python_conv_utils::to_list<double>(prop_test_copy()); + } + + // DocString: inputs_set_prop_test_list + /** + * @brief Set The value of the property to evaluate the loss function against for the test set + */ + inline void set_prop_test_list(py::list prop_test) + { + set_prop_test(python_conv_utils::from_list<double>(prop_test)); + } + + // DocString: inputs_get_prop_train_arr + /** + * @brief The value of the property to evaluate the loss function against for the training set + */ + inline np::ndarray prop_train_arr() const + { + return python_conv_utils::to_ndarray<double>(prop_train_copy()); + } + + // DocString: inputs_set_prop_train_arr + /** + * @brief Set The value of the property to evaluate the loss function against for the training set + */ + inline void set_prop_train_arr(np::ndarray prop_train) + { + set_prop_train(python_conv_utils::from_ndarray<double>(prop_train)); + } + + // DocString: inputs_get_prop_test_arr + /** + * @brief The value of the property to evaluate the loss function against for the test set + */ + inline np::ndarray prop_test_arr() const + { + return python_conv_utils::to_ndarray<double>(prop_test_copy()); + } + + // DocString: inputs_set_prop_test_arr + /** + * @brief Set The value of the property to evaluate the loss function against for the test set + */ + inline void set_prop_test_arr(np::ndarray prop_test) + { + set_prop_test(python_conv_utils::from_ndarray<double>(prop_test)); + } + + // DocString: inputs_get_leave_out_inds_py + /** + * @brief List of indexes from the initial data file in the test set + */ + inline py::list leave_out_inds_py() const + { + return python_conv_utils::to_list<int>(leave_out_inds_copy()); + } + + // DocString: inputs_set_leave_out_inds_py + /** + * @brief Set List of indexes from the initial data file in the test set + */ + inline void set_leave_out_inds_py(py::list leave_out_inds) + { + set_leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)); + } + + // DocString: inputs_get_task_sizes_train_py + /** + * @brief Number of training samples per task + */ + inline py::list task_sizes_train_py() const + { + return python_conv_utils::to_list<int>(task_sizes_train_copy()); + } + + // DocString: inputs_set_task_sizes_train_py + /** + * @brief Set Number of training samples per task + */ + inline void set_task_sizes_train_py(py::list task_sizes_train) + { + set_task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)); + } + + // DocString: inputs_get_task_sizes_test_py + /** + * @brief Number of testing samples per task + */ + inline py::list task_sizes_test_py() const + { + return python_conv_utils::to_list<int>(task_sizes_test_copy()); + } + + // DocString: inputs_set_task_sizes_test_py + /** + * @brief Set Number of testing samples per task + */ + inline void set_task_sizes_test_py(py::list task_sizes_test) + { + set_task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)); + } + + // DocString: inputs_get_phi_0_py + /** + * @brief A list of FeatureNodes for the primary feature space + */ + inline py::list phi_0_py() const + { + return python_conv_utils::to_list<FeatureNode>(phi_0_copy()); + } + + // DocString: inputs_set_phi_0_py + /** + * @brief Sets a list of FeatureNodes for the primary feature space + */ + inline void set_phi_0_py(py::list phi_0) + { + set_phi_0(python_conv_utils::from_list<FeatureNode>(phi_0)); + } +#endif + }; /** * @brief strips comments from the input file diff --git a/src/loss_function/LossFunctionConvexHull.cpp b/src/loss_function/LossFunctionConvexHull.cpp index 0c000c32382fc01d6a91ea8f401840ef2192396c..a28bff6fc1d878f35c668c4cb8db82c763260702 100644 --- a/src/loss_function/LossFunctionConvexHull.cpp +++ b/src/loss_function/LossFunctionConvexHull.cpp @@ -143,7 +143,7 @@ void LossFunctionConvexHull::setup_lp(bool initialize_sorted_d_mat) std::vector<int> inds(_task_sizes_train[tt]); std::iota(inds.begin(), inds.end(), task_start); - util_funcs::argsort<double>(inds.data(), inds.data() + inds.size(), &_prop_train[task_start]); + util_funcs::argsort<double>(inds.data(), inds.data() + inds.size(), _prop_train.data()); int cls_start = 0; _sample_inds_to_sorted_dmat_inds[inds[0]] = task_start; @@ -197,7 +197,7 @@ void LossFunctionConvexHull::setup_lp(bool initialize_sorted_d_mat) tt, _n_class, _n_feat, - std::accumulate(n_samp_per_class.begin(), n_samp_per_class.end(), 0), + std::accumulate(n_samp_per_class.begin() + tt * _n_class, n_samp_per_class.end(), 0), _width, n_samp_test_per_class, std::accumulate(n_samp_test_per_class.begin(), n_samp_test_per_class.end(), 0) diff --git a/src/main.cpp b/src/main.cpp index 2f85c88412ca47ed5dbea7c0e9f5a2b7db173a47..9e3418a0fb5ff2bede809922d6f8de7ac486a1b1 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -57,7 +57,7 @@ int main(int argc, char const *argv[]) boost::property_tree::json_parser::read_json(filename,propTree); double start = omp_get_wtime(); - InputParser ip(propTree, filename, mpi_setup::comm); + InputParser inputs(propTree, filename, mpi_setup::comm); if(mpi_setup::comm->rank() == 0) { boost::filesystem::remove(filename); @@ -66,39 +66,25 @@ int main(int argc, char const *argv[]) duration = omp_get_wtime() - start; if(mpi_setup::comm->rank() == 0) { - std::cout<< "time input_parsing/Feature space generation: " << duration << " s" << std::endl; + std::cout << "time input_parsing: " << duration << " s" << std::endl; } + std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>(inputs); + node_value_arrs::initialize_d_matrix_arr(); - if(ip._calc_type.compare("regression") == 0) + if(inputs.calc_type().compare("regression") == 0) { - SISSORegressor sisso( - ip._feat_space, - ip._prop_label, - ip._prop_unit, - ip._prop_train, - ip._prop_test, - ip._task_sizes_train, - ip._task_sizes_test, - ip._leave_out_inds, - ip._n_dim, - ip._n_residuals, - ip._n_models_store, - ip._sample_ids_train, - ip._sample_ids_test, - ip._task_names, - ip._fix_intercept - ); + SISSORegressor sisso(inputs, feat_space); sisso.fit(); if(mpi_setup::comm->rank() == 0) { for(int ii = 0; ii < sisso.models().size(); ++ii) { - std::cout << "Train RMSE: " << sisso.models()[ii][0].rmse() << " " << ip._prop_unit; - if(ip._prop_test.size() > 0) + std::cout << "Train RMSE: " << sisso.models()[ii][0].rmse() << " " << inputs.prop_unit(); + if(inputs.prop_test().size() > 0) { - std::cout << "; Test RMSE: " << sisso.models()[ii][0].test_rmse() << " " << ip._prop_unit << std::endl; + std::cout << "; Test RMSE: " << sisso.models()[ii][0].test_rmse() << " " << inputs.prop_unit() << std::endl; } else { @@ -108,25 +94,9 @@ int main(int argc, char const *argv[]) } } } - else if(ip._calc_type.compare("log_regression") == 0) + else if(inputs.calc_type().compare("log_regression") == 0) { - SISSOLogRegressor sisso( - ip._feat_space, - ip._prop_label, - ip._prop_unit, - ip._prop_train, - ip._prop_test, - ip._task_sizes_train, - ip._task_sizes_test, - ip._leave_out_inds, - ip._n_dim, - ip._n_residuals, - ip._n_models_store, - ip._sample_ids_train, - ip._sample_ids_test, - ip._task_names, - ip._fix_intercept - ); + SISSOLogRegressor sisso(inputs, feat_space); sisso.fit(); if(mpi_setup::comm->rank() == 0) @@ -134,7 +104,7 @@ int main(int argc, char const *argv[]) for(int ii = 0; ii < sisso.models().size(); ++ii) { std::cout << "Train RMSE: " << sisso.models()[ii][0].rmse(); - if(ip._prop_test.size() > 0) + if(inputs.prop_test().size() > 0) { std::cout << "; Test RMSE: " << sisso.models()[ii][0].test_rmse() << std::endl; } @@ -146,24 +116,9 @@ int main(int argc, char const *argv[]) } } } - else if(ip._calc_type.compare("classification") == 0) + else if(inputs.calc_type().compare("classification") == 0) { - SISSOClassifier sisso( - ip._feat_space, - ip._prop_label, - ip._prop_unit, - ip._prop_train, - ip._prop_test, - ip._task_sizes_train, - ip._task_sizes_test, - ip._leave_out_inds, - ip._n_dim, - ip._n_residuals, - ip._n_models_store, - ip._sample_ids_train, - ip._sample_ids_test, - ip._task_names - ); + SISSOClassifier sisso(inputs, feat_space); sisso.fit(); if(mpi_setup::comm->rank() == 0) @@ -171,7 +126,7 @@ int main(int argc, char const *argv[]) for(int ii = 0; ii < sisso.models().size(); ++ii) { std::cout << "Percent of training data in the convex overlap region: " << sisso.models()[ii][0].percent_train_error() << "%"; - if(ip._prop_test.size() > 0) + if(inputs.prop_test().size() > 0) { std::cout << "; Percent of test data in the convex overlap region: " << sisso.models()[ii][0].percent_test_error() << "%" << std::endl; } diff --git a/src/python/py_binding_cpp_def/bindings_docstring_keyed.cpp b/src/python/py_binding_cpp_def/bindings_docstring_keyed.cpp index cbb996bc0f41cdd1205505fa21c49b45a24cbd1f..3cf68d68cdc681fb50810bbcb68573d9cd4ecb27 100644 --- a/src/python/py_binding_cpp_def/bindings_docstring_keyed.cpp +++ b/src/python/py_binding_cpp_def/bindings_docstring_keyed.cpp @@ -25,6 +25,7 @@ using namespace boost::python; void sisso::register_all() { + sisso::registerInputs(); sisso::descriptor_identifier::registerModel(); sisso::descriptor_identifier::registerModelRegressor(); sisso::descriptor_identifier::registerModelLogRegressor(); @@ -58,39 +59,58 @@ void sisso::register_all() sisso::feature_creation::node::registerSqrtNode(); sisso::feature_creation::node::registerSixPowNode(); - void (*init_val_ar)(int, int, int, int) = &node_value_arrs::initialize_values_arr; - void (*init_val_ar_list)(py::list, py::list, int, int) = &node_value_arrs::initialize_values_arr; - void (*init_val_ar_arr)(np::ndarray, np::ndarray, int, int) = &node_value_arrs::initialize_values_arr; - def( "phi_selected_from_file", &str2node::phi_selected_from_file_py, (arg("filename"), arg("phi_0")), "@DocString_node_utils_phi_sel_from_file@" ); + + void (*init_val_ar_list)(py::list, py::list, int, int, bool) = &node_value_arrs::initialize_values_arr; + void (*init_val_ar_arr)(np::ndarray, np::ndarray, int, int, bool) = &node_value_arrs::initialize_values_arr; + + void (*init_val_ar_list_no_params)(py::list, py::list, int, int) = &node_value_arrs::initialize_values_arr; + void (*init_val_ar_arr_no_params)(np::ndarray, np::ndarray, int, int) = &node_value_arrs::initialize_values_arr; + def( "initialize_values_arr", - init_val_ar, - (arg("n_samples"), arg("n_samples_test"), arg("n_primary_feat"), arg("max_rung")), - "@DocString_node_vals_init_no_ts@" + init_val_ar_list, + (arg("task_sz_train"), arg("task_sz_test"), arg("n_primary_feat"), arg("max_rung"), arg("use_params")), + "@DocString_node_vals_ts_list@" ); def( "initialize_values_arr", - init_val_ar_list, + init_val_ar_arr, + (arg("task_sz_train"), arg("task_sz_test"), arg("n_primary_feat"), arg("max_rung"), arg("use_params")), + "@DocString_node_vals_ts_arr@" + ); + + def( + "initialize_values_arr", + init_val_ar_list_no_params, (arg("task_sz_train"), arg("task_sz_test"), arg("n_primary_feat"), arg("max_rung")), - "@DocString_node_vals_ts_list@" + "@DocString_node_vals_ts_list_no_params@" ); + def( "initialize_values_arr", - init_val_ar_arr, + init_val_ar_arr_no_params, (arg("task_sz_train"), arg("task_sz_test"), arg("n_primary_feat"), arg("max_rung")), - "@DocString_node_vals_ts_arr@" + "@DocString_node_vals_ts_arr_no_params@" ); + def( "initialize_d_matrix_arr", &node_value_arrs::initialize_d_matrix_arr, - "@DocString_node_vlas_init_d_mat@" + "@DocString_node_vals_init_d_mat@" ); + + def( + "finalize_values_arr", + &node_value_arrs::finialize_values_arr, + "@DocString_node_vals_finalize@" + ); + def( "matlabify", &str_utils::matlabify, @@ -156,153 +176,70 @@ void sisso::register_all() #endif } +void sisso::registerInputs() +{ + class_<InputParser>( + "Inputs", + "@DocString_cls_input_parser@", + init<>(arg("self"), "@DocString_input_init_default@") + ) + .add_property("sample_ids_train", &InputParser::sample_ids_train_py, &InputParser::set_sample_ids_train_py, "@DocString_inputs_sample_ids_train_py@") + .add_property("sample_ids_test", &InputParser::sample_ids_test_py, &InputParser::set_sample_ids_test_py, "@DocString_inputs_sample_ids_test_py@") + .add_property("task_names", &InputParser::task_names_py, &InputParser::set_task_names_py, "@DocString_inputs_task_names_py@") + .add_property("allowed_param_ops", &InputParser::allowed_param_ops_py, &InputParser::set_allowed_param_ops_py, "@DocString_inputs_allowed_param_ops_py@") + .add_property("allowed_ops", &InputParser::allowed_ops_py, &InputParser::set_allowed_ops_py, "@DocString_inputs_allowed_ops_py@") + // .add_property("prop_train", &InputParser::prop_train_list, &InputParser::set_prop_train_list, "@DocString_inputs_prop_train_list@") + // .add_property("prop_test", &InputParser::prop_test_list, &InputParser::set_prop_test_list, "@DocString_inputs_prop_test_list@") + .add_property("prop_train", &InputParser::prop_train_arr, &InputParser::set_prop_train_arr, "@DocString_inputs_prop_train_arr@") + .add_property("prop_test", &InputParser::prop_test_arr, &InputParser::set_prop_test_arr, "@DocString_inputs_prop_test_arr@") + .add_property("leave_out_inds", &InputParser::leave_out_inds_py, &InputParser::set_leave_out_inds_py, "@DocString_inputs_leave_out_inds_py@") + .add_property("task_sizes_train", &InputParser::task_sizes_train_py, &InputParser::set_task_sizes_train_py, "@DocString_inputs_task_sizes_train_py@") + .add_property("task_sizes_test", &InputParser::task_sizes_test_py, &InputParser::set_task_sizes_test_py, "@DocString_inputs_task_sizes_test_py@") + .add_property("phi_0", &InputParser::phi_0_py, &InputParser::set_phi_0_py, "@DocString_inputs_phi_0_py@") + .add_property("prop_unit", &InputParser::prop_unit, &InputParser::set_prop_unit, "@DocString_inputs_prop_unit@") + .add_property("filename", &InputParser::filename, &InputParser::set_filename, "@DocString_inputs_filename@") + .add_property("data_file", &InputParser::data_file, &InputParser::set_data_file, "@DocString_inputs_data_file@") + .add_property("prop_key", &InputParser::prop_key, &InputParser::set_prop_key, "@DocString_inputs_prop_key@") + .add_property("prop_label", &InputParser::prop_label, &InputParser::set_prop_label, "@DocString_inputs_prop_label@") + .add_property("task_key", &InputParser::task_key, &InputParser::set_task_key, "@DocString_inputs_task_key@") + .add_property("calc_type", &InputParser::calc_type, &InputParser::set_calc_type, "@DocString_inputs_calc_type@") + .add_property("cross_cor_max", &InputParser::cross_cor_max, &InputParser::set_cross_cor_max, "@DocString_inputs_cross_cor_max@") + .add_property("l_bound", &InputParser::l_bound, &InputParser::set_l_bound, "@DocString_inputs_l_bound@") + .add_property("u_bound", &InputParser::u_bound, &InputParser::set_u_bound, "@DocString_inputs_u_bound@") + .add_property("n_dim", &InputParser::n_dim, &InputParser::set_n_dim, "@DocString_inputs_n_dim@") + .add_property("max_rung", &InputParser::max_rung, &InputParser::set_max_rung, "@DocString_inputs_max_rung@") + .add_property("n_rung_store", &InputParser::n_rung_store, &InputParser::set_n_rung_store, "@DocString_inputs_n_rung_store@") + .add_property("n_rung_generate", &InputParser::n_rung_generate, &InputParser::set_n_rung_generate, "@DocString_inputs_n_rung_generate@") + .add_property("n_sis_select", &InputParser::n_sis_select, &InputParser::set_n_sis_select, "@DocString_inputs_n_sis_select@") + .add_property("n_samp", &InputParser::n_samp, "@DocString_inputs_n_samp@") + .add_property("n_samp_train", &InputParser::n_samp_train, "@DocString_inputs_n_samp_train@") + .add_property("n_samp_test", &InputParser::n_samp_test, "@DocString_inputs_n_samp_test@") + .add_property("n_residual", &InputParser::n_residual, &InputParser::set_n_residual, "@DocString_inputs_n_residual@") + .add_property("n_models_store", &InputParser::n_models_store, &InputParser::set_n_models_store, "@DocString_inputs_n_models_store@") + .add_property("max_param_depth", &InputParser::max_param_depth, &InputParser::set_max_param_depth, "@DocString_inputs_max_param_depth@") + .add_property("nlopt_seed", &InputParser::nlopt_seed, &InputParser::set_nlopt_seed, "@DocString_inputs_nlopt_seed@") + .add_property("fix_intercept", &InputParser::fix_intercept, &InputParser::set_fix_intercept, "@DocString_inputs_fix_intercept@") + .add_property("global_param_opt", &InputParser::global_param_opt, &InputParser::set_global_param_opt, "@DocString_inputs_global_param_opt@") + .add_property("reparam_residual", &InputParser::reparam_residual, &InputParser::set_reparam_residual, "@DocString_inputs_reparam_residual@") + ; +} + void sisso::feature_creation::registerFeatureSpace() { void (FeatureSpace::*sis_list)(list) = &FeatureSpace::sis; void (FeatureSpace::*sis_ndarray)(np::ndarray) = &FeatureSpace::sis; - #ifdef PARAMETERIZE - class_<FeatureSpace>( - "FeatureSpace", - "@DocString_cls_feat_space@", - init<list, list, list, np::ndarray, optional<std::string, int, int, int, int, double, double, double, int, bool>>( - ( - arg("self"), - arg("phi_0"), - arg("allowed_ops"), - arg("allowed_param_ops"), - arg("prop"), - arg("project_type")="regression", - arg("max_rung")=1, - arg("n_sis_select")=1, - arg("n_rung_store")=-1, - arg("n_rung_generate")=0, - arg("cross_corr_max")=1.0, - arg("min_abs_feat_val")=1e-50, - arg("max_abs_feat_val")=1e50, - arg("max_param_depth")=-1, - arg("reparam_residual")=false - ), - "@DocString_feat_space_init_np_array@" - ) - ) - .def( - init<list, list, list, list, optional<std::string, int, int, int, int, double, double, double, int, bool>>( - ( - arg("self"), - arg("phi_0"), - arg("allowed_ops"), - arg("allowed_param_ops"), - arg("prop"), - arg("project_type")="regression", - arg("max_rung")=1, - arg("n_sis_select")=1, - arg("n_rung_store")=-1, - arg("n_rung_generate")=0, - arg("cross_corr_max")=1.0, - arg("min_abs_feat_val")=1e-50, - arg("max_abs_feat_val")=1e50, - arg("max_param_depth")=-1, - arg("reparam_residual")=false - ), - "@DocString_feat_space_init_py_list@" - ) - ) - .def( - init<std::string, list, list, list, optional<std::string, int, double>>( - ( - arg("self"), - arg("feature_file"), - arg("phi_0"), - arg("prop"), - arg("task_sizes"), - arg("project_type")="regression", - arg("n_sis_select")=1, - arg("cross_corr_max")=1.0 - ), - "@DocString_feat_space_init_file_py_list@" - ) - ) - .def( - init<std::string, list, np::ndarray, list, optional<std::string, int, double>>( - ( - arg("self"), - arg("feature_file"), - arg("phi_0"), - arg("prop"), - arg("task_sizes"), - arg("project_type")="regression", - arg("n_sis_select")=1, - arg("cross_corr_max")=1.0 - ), - "@DocString_feat_space_init_file_np_array@" - ) - ) - .def("sis", sis_list, (arg("self"), arg("prop")), "@DocString_feat_space_sis_list@") - .def("sis", sis_ndarray, (arg("self"), arg("prop")), "@DocString_feat_space_sis_arr@") - .def("feat_in_phi", &FeatureSpace::feat_in_phi, (arg("self"), arg("ind")), "@DocString_feat_space_feat_in_phi@") - .def("remove_feature", &FeatureSpace::remove_feature, (arg("self"), arg("ind")), "@DocString_feat_space_remove_feature@") - .def("get_feature", &FeatureSpace::get_feature, (arg("self"), arg("ind")), "@DocString_feat_space_get_feature@") - .add_property("phi_selected", &FeatureSpace::phi_selected_py, "@DocString_feat_space_phi_selected_py@") - .add_property("phi0", &FeatureSpace::phi0_py, "@DocString_feat_space_phi0_py@") - .add_property("phi", &FeatureSpace::phi_py, "@DocString_feat_space_phi_py@") - .add_property("scores", &FeatureSpace::scores_py, "@DocString_feat_space_scores_py@") - .add_property("task_sizes", &FeatureSpace::task_sizes_py, "@DocString_feat_space_task_sizes_py@") - .add_property("allowed_ops", &FeatureSpace::allowed_ops_py, "@DocString_feat_space_allowed_ops_py@") - .add_property("start_rung", &FeatureSpace::start_rung_py, "@DocString_feat_space_start_rung_py@") - .add_property("feature_space_file", &FeatureSpace::feature_space_file, "@DocString_feat_space_feature_space_file@") - .add_property("l_bound", &FeatureSpace::l_bound, "@DocString_feat_space_l_bound@") - .add_property("u_bound", &FeatureSpace::u_bound, "@DocString_feat_space_u_bound@") - .add_property("max_rung", &FeatureSpace::max_rung, "@DocString_feat_space_max_rung@") - .add_property("n_sis_select", &FeatureSpace::n_sis_select, "@DocString_feat_space_n_sis_select@") - .add_property("n_samp", &FeatureSpace::n_samp, "@DocString_feat_space_n_samp@") - .add_property("n_feat", &FeatureSpace::n_feat, "@DocString_feat_space_n_feat@") - .add_property("n_rung_store", &FeatureSpace::n_rung_store, "@DocString_feat_space_n_rung_store@") - .add_property("n_rung_generate", &FeatureSpace::n_rung_generate, "@DocString_feat_space_n_rung_generate@") - ; - #else class_<FeatureSpace>( "FeatureSpace", "@DocString_cls_feat_space@", - init<list, list, list, np::ndarray, optional<std::string, int, int, int, int, double, double, double, int>>( + init<InputParser>( ( arg("self"), - arg("phi_0"), - arg("allowed_ops"), - arg("prop"), - arg("project_type")="regression", - arg("max_rung")=1, - arg("n_sis_select")=1, - arg("n_rung_store")=-1, - arg("n_rung_generate")=0, - arg("cross_corr_max")=1.0, - arg("min_abs_feat_val")=1e-50, - arg("max_abs_feat_val")=1e50, - arg("max_param_depth")=-1, + arg("inputs") ), - "@DocString_feat_space_init_no_param_np_array@" + "@DocString_feat_space_init@" ) ) - .def( - init<list, list, list, list, optional<std::string, int, int, int, int, double, double, double, int>>( - ( - arg("self"), - arg("phi_0"), - arg("allowed_ops"), - arg("prop"), - arg("project_type")="regression", - arg("max_rung")=1, - arg("n_sis_select")=1, - arg("n_rung_store")=-1, - arg("n_rung_generate")=0, - arg("cross_corr_max")=1.0, - arg("min_abs_feat_val")=1e-50, - arg("max_abs_feat_val")=1e50, - arg("max_param_depth")=-1, - ), - "@DocString_feat_space_init_no_param_py_list@" - ) - ) .def( init<std::string, list, list, list, optional<std::string, int, double>>( ( @@ -342,7 +279,7 @@ void sisso::feature_creation::registerFeatureSpace() .add_property("phi0", &FeatureSpace::phi0_py, "@DocString_feat_space_phi0_py@") .add_property("phi", &FeatureSpace::phi_py, "@DocString_feat_space_phi_py@") .add_property("scores", &FeatureSpace::scores_py, "@DocString_feat_space_scores_py@") - .add_property("task_sizes", &FeatureSpace::task_sizes_py, "@DocString_feat_space_task_sizes_py@") + .add_property("task_sizes_train", &FeatureSpace::task_sizes_train_py, "@DocString_feat_space_task_sizes_py@") .add_property("allowed_ops", &FeatureSpace::allowed_ops_py, "@DocString_feat_space_allowed_ops_py@") .add_property("start_rung", &FeatureSpace::start_rung_py, "@DocString_feat_space_start_rung_py@") .add_property("feature_space_file", &FeatureSpace::feature_space_file, "@DocString_feat_space_feature_space_file@") @@ -350,12 +287,11 @@ void sisso::feature_creation::registerFeatureSpace() .add_property("u_bound", &FeatureSpace::u_bound, "@DocString_feat_space_u_bound@") .add_property("max_rung", &FeatureSpace::max_rung, "@DocString_feat_space_max_rung@") .add_property("n_sis_select", &FeatureSpace::n_sis_select, "@DocString_feat_space_n_sis_select@") - .add_property("n_samp", &FeatureSpace::n_samp, "@DocString_feat_space_n_samp@") + .add_property("n_samp_train", &FeatureSpace::n_samp_train, "@DocString_feat_space_n_samp@") .add_property("n_feat", &FeatureSpace::n_feat, "@DocString_feat_space_n_feat@") .add_property("n_rung_store", &FeatureSpace::n_rung_store, "@DocString_feat_space_n_rung_store@") .add_property("n_rung_generate", &FeatureSpace::n_rung_generate, "@DocString_feat_space_n_rung_generate@") ; - #endif } void sisso::feature_creation::registerUnit() @@ -1380,7 +1316,7 @@ void sisso::descriptor_identifier::registerModelClassifier() void sisso::descriptor_identifier::registerSISSOSolver() { class_<sisso::descriptor_identifier::SISSOSolver_Wrap, boost::noncopyable>("SISSOSolver", "@DocString_cls_sisso@", no_init) - .add_property("prop", &SISSOSolver::prop_py, "@DocString_sisso_di_prop_py@") + .add_property("prop_train", &SISSOSolver::prop_train_py, "@DocString_sisso_di_prop_train_py@") .add_property("prop_test", &SISSOSolver::prop_test_py, "@DocString_sisso_di_prop_test_py@") .add_property("n_samp", &SISSOSolver::n_samp, "@DocString_sisso_di_n_samp@") .add_property("n_dim", &SISSOSolver::n_dim, "@DocString_sisso_di_n_dim@") @@ -1397,49 +1333,11 @@ void sisso::descriptor_identifier::registerSISSORegressor() class_<SISSORegressor, bases<SISSOSolver>>( "SISSORegressor", "@DocString_cls_sisso_reg@", - init< - std::shared_ptr<FeatureSpace>, - std::string, - Unit, - np::ndarray, - np::ndarray, - py::list, - py::list, - py::list, - int, - int, - int, - py::list, - py::list, - py::list, - optional<bool> - >( - (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names"), arg("fix_intercept")), - "@DocString_sisso_reg_init_arr@" + init<InputParser, std::shared_ptr<FeatureSpace>>( + (arg("self"), arg("self"), arg("feat_space")), + "@DocString_sisso_reg_init@" ) ) - .def( - init< - std::shared_ptr<FeatureSpace>, - std::string, - Unit, - py::list, - py::list, - py::list, - py::list, - py::list, - int, - int, - int, - py::list, - py::list, - py::list, - optional<bool> - >( - (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names"), arg("fix_intercept")), - "@DocString_sisso_reg_init_list@" - ) - ) .def("fit", &SISSORegressor::fit, (arg("self")), "@DocString_sisso_reg_fit@") .add_property("models", &SISSORegressor::models_py, "@DocString_sisso_reg_models_py@") ; @@ -1450,49 +1348,11 @@ void sisso::descriptor_identifier::registerSISSOLogRegressor() class_<SISSOLogRegressor, bases<SISSORegressor>>( "SISSOLogRegressor", "@DocString_cls_sisso_log_reg@", - init< - std::shared_ptr<FeatureSpace>, - std::string, - Unit, - np::ndarray, - np::ndarray, - py::list, - py::list, - py::list, - int, - int, - int, - py::list, - py::list, - py::list, - optional<bool> - >( - (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names"), arg("fix_intercept")), + init<InputParser, std::shared_ptr<FeatureSpace>>( + (arg("self"), arg("self"), arg("feat_space")), "@DocString_sisso_log_reg_init_arr@" ) ) - .def( - init< - std::shared_ptr<FeatureSpace>, - std::string, - Unit, - py::list, - py::list, - py::list, - py::list, - py::list, - int, - int, - int, - py::list, - py::list, - py::list, - optional<bool> - >( - (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names"), arg("fix_intercept")), - "@DocString_sisso_log_reg_init_list@" - ) - ) .add_property("models", &SISSOLogRegressor::models_log_reg_py, "@DocString_sisso_log_reg_models_py@") ; } @@ -1502,17 +1362,11 @@ void sisso::descriptor_identifier::registerSISSOClassifier() class_<SISSOClassifier, bases<SISSOSolver>>( "SISSOClassifier", "@DocString_cls_sisso_class@", - init<std::shared_ptr<FeatureSpace>, std::string, Unit, np::ndarray, np::ndarray, py::list, py::list, py::list, int, int, int, py::list, py::list, py::list>( - (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names")), + init<InputParser, std::shared_ptr<FeatureSpace>>( + (arg("self"), arg("self"), arg("feat_space")), "@DocString_sisso_class_init_arr@" ) ) - .def( - init<std::shared_ptr<FeatureSpace>, std::string, Unit, py::list, py::list, py::list, py::list, py::list, int, int, int, py::list, py::list, py::list>( - (arg("self"), arg("feat_space"), arg("prop_label"), arg("prop_unit"), arg("prop"), arg("prop_test"), arg("task_sizes_train"), arg("task_sizes_test"), arg("leave_out_inds"), arg("n_dim"), arg("n_residual"), arg("n_models_store"), arg("sample_ids_train"), arg("sample_ids_test"), arg("task_names")), - "@DocString_sisso_class_init_list@" - ) - ) .def("fit", &SISSOClassifier::fit, (arg("self")), "@DocString_sisso_class_fit@") .add_property("models", &SISSOClassifier::models_py, "@DocString_sisso_class_models_py@") ; diff --git a/src/python/py_binding_cpp_def/bindings_docstring_keyed.hpp b/src/python/py_binding_cpp_def/bindings_docstring_keyed.hpp index d3dc20a225e616090007573548a36300cf806e43..67334054aaf05e72584ed0a91cc708ebfd73a725 100644 --- a/src/python/py_binding_cpp_def/bindings_docstring_keyed.hpp +++ b/src/python/py_binding_cpp_def/bindings_docstring_keyed.hpp @@ -44,6 +44,8 @@ namespace sisso */ void register_all(); + static void registerInputs(); + namespace feature_creation { static void registerFeatureSpace(); diff --git a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOClassifier.cpp b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOClassifier.cpp index 4ae3b8babdb20a0b4d503480cbf4fb9e06fd07a9..b2b4f6da8b47c09611b91e1504607cb1cbb4b376 100644 --- a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOClassifier.cpp +++ b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOClassifier.cpp @@ -21,88 +21,6 @@ #include "descriptor_identifier/solver/SISSOClassifier.hpp" -SISSOClassifier::SISSOClassifier( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names -) : - SISSOSolver( - "classification", - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - false - ), - _c(100.0), - _width(1.0e-5), - _n_class(1) -{ - setup_d_mat_transfer(); -} - -SISSOClassifier::SISSOClassifier( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names -) : - SISSOSolver( - "classification", - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - false - ), - _c(100.0), - _width(1.0e-5), - _n_class(1) -{ - setup_d_mat_transfer(); -} - py::list SISSOClassifier::models_py() { py::list model_list; diff --git a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOLogRegressor.cpp b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOLogRegressor.cpp index 53ccf1ac805c537c84631e687e8db40251f9ce29..13f96b85e9aeea5aa3918d73ddd087e173a67faf 100644 --- a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOLogRegressor.cpp +++ b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOLogRegressor.cpp @@ -21,108 +21,6 @@ #include "descriptor_identifier/solver/SISSOLogRegressor.hpp" -SISSOLogRegressor::SISSOLogRegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept -) : - SISSORegressor( - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - fix_intercept - ) -{ - std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop); - std::vector<double> prop_test_vec = python_conv_utils::from_ndarray<double>(prop_test); - - std::transform(prop_vec.begin(), prop_vec.end(), prop_vec.begin(), [](double p){return std::log(p);}); - std::transform(prop_test_vec.begin(), prop_test_vec.end(), prop_test_vec.begin(), [](double p){return std::log(p);}); - - _loss = loss_function_util::get_loss_function( - "log_regression", - prop_vec, - prop_test_vec, - _task_sizes_train, - _task_sizes_test, - _fix_intercept - ); -} - -SISSOLogRegressor::SISSOLogRegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept -) : - SISSORegressor( - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - fix_intercept - ) -{ - std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop); - std::vector<double> prop_test_vec = python_conv_utils::from_list<double>(prop_test); - - std::transform(prop_vec.begin(), prop_vec.end(), prop_vec.begin(), [](double p){return std::log(p);}); - std::transform(prop_test_vec.begin(), prop_test_vec.end(), prop_test_vec.begin(), [](double p){return std::log(p);}); - - _loss = loss_function_util::get_loss_function( - "log_regression", - prop_vec, - prop_test_vec, - _task_sizes_train, - _task_sizes_test, - _fix_intercept - ); -} - py::list SISSOLogRegressor::models_log_reg_py() { py::list model_list; diff --git a/src/python/py_binding_cpp_def/descriptor_identifier/SISSORegressor.cpp b/src/python/py_binding_cpp_def/descriptor_identifier/SISSORegressor.cpp index 4b023f7501358f1d32897d648a9c2d8e06196532..f5dbd78b8ecc92189888f252deebd12fe231ef53 100644 --- a/src/python/py_binding_cpp_def/descriptor_identifier/SISSORegressor.cpp +++ b/src/python/py_binding_cpp_def/descriptor_identifier/SISSORegressor.cpp @@ -21,80 +21,6 @@ #include "descriptor_identifier/solver/SISSORegressor.hpp" -SISSORegressor::SISSORegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept -) : - SISSOSolver( - "regression", - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - fix_intercept - ) -{} - -SISSORegressor::SISSORegressor( - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept -) : - SISSOSolver( - "regression", - feat_space, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - n_dim, - n_residual, - n_models_store, - sample_ids_train, - sample_ids_test, - task_names, - fix_intercept - ) -{} - py::list SISSORegressor::models_py() { py::list model_list; diff --git a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOSolver.cpp b/src/python/py_binding_cpp_def/descriptor_identifier/SISSOSolver.cpp deleted file mode 100644 index 3d6ef29a217936f2e0d8855528927da2873e8f99..0000000000000000000000000000000000000000 --- a/src/python/py_binding_cpp_def/descriptor_identifier/SISSOSolver.cpp +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2021 Thomas A. R. Purcell -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -/** @file python/py_bindings_cpp_def/descriptor_identifier/SISSOSolver.cpp - * @brief Implements the python based functionality of the base class for creating solvers using SISSO - * - * @author Thomas A. R. Purcell (tpurcell90) - * @bug No known bugs. - */ - -#include "descriptor_identifier/solver/SISSOSolver.hpp" - -SISSOSolver::SISSOSolver( - std::string loss_type, - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - np::ndarray prop, - np::ndarray prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept -) : - _sample_ids_train(python_conv_utils::from_list<std::string>(sample_ids_train)), - _sample_ids_test(python_conv_utils::from_list<std::string>(sample_ids_test)), - _task_names(python_conv_utils::from_list<std::string>(task_names)), - _task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)), - _task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)), - _leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)), - _prop_label(prop_label), - _prop_unit(prop_unit), - _feat_space(feat_space), - _mpi_comm(feat_space->mpi_comm()), - _n_task(py::len(task_sizes_train)), - _n_samp(prop.shape(0)), - _n_dim(n_dim), - _n_residual(n_residual), - _n_models_store(n_models_store), - _fix_intercept(fix_intercept) -{ - node_value_arrs::initialize_d_matrix_arr(); - - _loss = loss_function_util::get_loss_function( - loss_type, - python_conv_utils::from_ndarray<double>(prop), - python_conv_utils::from_ndarray<double>(prop_test), - _task_sizes_train, - _task_sizes_test, - _fix_intercept - ); -} - -SISSOSolver::SISSOSolver( - std::string loss_type, - std::shared_ptr<FeatureSpace> feat_space, - std::string prop_label, - Unit prop_unit, - py::list prop, - py::list prop_test, - py::list task_sizes_train, - py::list task_sizes_test, - py::list leave_out_inds, - int n_dim, - int n_residual, - int n_models_store, - py::list sample_ids_train, - py::list sample_ids_test, - py::list task_names, - bool fix_intercept -) : - _sample_ids_train(python_conv_utils::from_list<std::string>(sample_ids_train)), - _sample_ids_test(python_conv_utils::from_list<std::string>(sample_ids_test)), - _task_names(python_conv_utils::from_list<std::string>(task_names)), - _task_sizes_train(python_conv_utils::from_list<int>(task_sizes_train)), - _task_sizes_test(python_conv_utils::from_list<int>(task_sizes_test)), - _leave_out_inds(python_conv_utils::from_list<int>(leave_out_inds)), - _prop_label(prop_label), - _prop_unit(prop_unit), - _feat_space(feat_space), - _mpi_comm(feat_space->mpi_comm()), - _n_task(py::len(task_sizes_train)), - _n_samp(py::len(prop)), - _n_dim(n_dim), - _n_residual(n_residual), - _n_models_store(n_models_store), - _fix_intercept(fix_intercept) -{ - node_value_arrs::initialize_d_matrix_arr(); - - _loss = loss_function_util::get_loss_function( - loss_type, - python_conv_utils::from_list<double>(prop), - python_conv_utils::from_list<double>(prop_test), - _task_sizes_train, - _task_sizes_test, - _fix_intercept - ); -} diff --git a/src/python/py_binding_cpp_def/feature_creation/FeatureNode.cpp b/src/python/py_binding_cpp_def/feature_creation/FeatureNode.cpp index 61b100fc2135105f814e2a3c9c06dada05dcd91d..eff20559676fb04fb88f44299dc3699252c7df3b 100644 --- a/src/python/py_binding_cpp_def/feature_creation/FeatureNode.cpp +++ b/src/python/py_binding_cpp_def/feature_creation/FeatureNode.cpp @@ -31,7 +31,7 @@ FeatureNode::FeatureNode(unsigned long int feat_ind, std::string expr, np::ndarr // Automatically resize the storage arrays if(node_value_arrs::N_STORE_FEATURES == 0) { - node_value_arrs::initialize_values_arr(_n_samp, _n_samp_test, 1, 0); + node_value_arrs::initialize_values_arr(_n_samp, _n_samp_test, 1); } else if((_n_samp != node_value_arrs::N_SAMPLES) || (_n_samp_test != node_value_arrs::N_SAMPLES_TEST)) { @@ -65,7 +65,7 @@ FeatureNode::FeatureNode(unsigned long int feat_ind, std::string expr, py::list // Automatically resize the storage arrays if(node_value_arrs::N_STORE_FEATURES == 0) { - node_value_arrs::initialize_values_arr(_n_samp, _n_samp_test, 1, 0); + node_value_arrs::initialize_values_arr(_n_samp, _n_samp_test, 1); } else if((_n_samp != node_value_arrs::N_SAMPLES) || (_n_samp_test != node_value_arrs::N_SAMPLES_TEST)) { diff --git a/src/python/py_binding_cpp_def/feature_creation/FeatureSpace.cpp b/src/python/py_binding_cpp_def/feature_creation/FeatureSpace.cpp index 54b9ffff698cd965475423394487f9fdd6e9cd0d..a721be1fdcb164518bdb586557acc2d3d6cb78df 100644 --- a/src/python/py_binding_cpp_def/feature_creation/FeatureSpace.cpp +++ b/src/python/py_binding_cpp_def/feature_creation/FeatureSpace.cpp @@ -21,175 +21,6 @@ #include "feature_creation/feature_space/FeatureSpace.hpp" -#ifdef PARAMETERIZE -FeatureSpace::FeatureSpace( - py::list phi_0, - py::list allowed_ops, - py::list allowed_param_ops, - py::list prop, - std::string project_type, - int max_rung, - int n_sis_select, - int n_rung_store, - int n_rung_generate, - double cross_corr_max, - double min_abs_feat_val, - double max_abs_feat_val, - int max_param_depth, - bool reparam_residual -): - _phi(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)), - _phi_0(_phi), - _end_no_params(1, 0), - _start_rung_reparam(1, 0), - _allowed_param_ops(python_conv_utils::from_list<std::string>(allowed_param_ops)), - _allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)), - _prop(python_conv_utils::from_list<double>(prop)), - _scores(py::len(phi_0), 0.0), - _task_sizes(node_value_arrs::TASK_SZ_TRAIN), - _start_rung(1, 0), - _feature_space_file("feature_space/selected_features.txt"), - _feature_space_summary_file("feature_space/SIS_summary.txt"), - _project_type(project_type), - _mpi_comm(mpi_setup::comm), - _cross_cor_max(cross_corr_max), - _l_bound(min_abs_feat_val), - _u_bound(max_abs_feat_val), - _max_rung(max_rung), - _n_sis_select(n_sis_select), - _n_feat(py::len(phi_0)), - _n_rung_store(n_rung_store), - _n_rung_generate(n_rung_generate), - _n_samp(_phi[0]->n_samp()), - _max_param_depth(max_param_depth), - _reparam_residual(reparam_residual) -{ - initialize_fs(); -} - -FeatureSpace::FeatureSpace( - py::list phi_0, - py::list allowed_ops, - py::list allowed_param_ops, - np::ndarray prop, - std::string project_type, - int max_rung, - int n_sis_select, - int n_rung_store, - int n_rung_generate, - double cross_corr_max, - double min_abs_feat_val, - double max_abs_feat_val, - int max_param_depth, - bool reparam_residual -): - _phi(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)), - _phi_0(_phi), - _end_no_params(1, 0), - _start_rung_reparam(1, 0), - _allowed_param_ops(python_conv_utils::from_list<std::string>(allowed_param_ops)), - _allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)), - _prop(python_conv_utils::from_ndarray<double>(prop)), - _scores(py::len(phi_0), 0.0), - _task_sizes(node_value_arrs::TASK_SZ_TRAIN), - _start_rung(1, 0), - _feature_space_file("feature_space/selected_features.txt"), - _feature_space_summary_file("feature_space/SIS_summary.txt"), - _project_type(project_type), - _mpi_comm(mpi_setup::comm), - _cross_cor_max(cross_corr_max), - _l_bound(min_abs_feat_val), - _u_bound(max_abs_feat_val), - _max_rung(max_rung), - _n_sis_select(n_sis_select), - _n_feat(py::len(phi_0)), - _n_rung_store(n_rung_store), - _n_rung_generate(n_rung_generate), - _n_samp(_phi[0]->n_samp()), - _max_param_depth(max_param_depth), - _reparam_residual(reparam_residual) -{ - initialize_fs(); -} -#else -FeatureSpace::FeatureSpace( - py::list phi_0, - py::list allowed_ops, - py::list prop, - std::string project_type, - int max_rung, - int n_sis_select, - int n_rung_store, - int n_rung_generate, - double cross_corr_max, - double min_abs_feat_val, - double max_abs_feat_val -): - _phi(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)), - _phi_0(_phi), - _allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)), - _prop(python_conv_utils::from_list<double>(prop)), - _scores(py::len(phi_0), 0.0), - _task_sizes(node_value_arrs::TASK_SZ_TRAIN), - _start_rung(1, 0), - _feature_space_file("feature_space/selected_features.txt"), - _feature_space_summary_file("feature_space/SIS_summary.txt"), - _project_type(project_type), - _mpi_comm(mpi_setup::comm), - _cross_cor_max(cross_corr_max), - _l_bound(min_abs_feat_val), - _u_bound(max_abs_feat_val), - _max_rung(max_rung), - _n_sis_select(n_sis_select), - _n_feat(py::len(phi_0)), - _n_rung_store(n_rung_store), - _n_rung_generate(n_rung_generate), - _n_samp(_phi[0]->n_samp()), - _max_param_depth(0), - _reparam_residual(false) -{ - initialize_fs(); -} - -FeatureSpace::FeatureSpace( - py::list phi_0, - py::list allowed_ops, - np::ndarray prop, - std::string project_type, - int max_rung, - int n_sis_select, - int n_rung_store, - int n_rung_generate, - double cross_corr_max, - double min_abs_feat_val, - double max_abs_feat_val -): - _phi(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)), - _phi_0(_phi), - _allowed_ops(python_conv_utils::from_list<std::string>(allowed_ops)), - _prop(python_conv_utils::from_ndarray<double>(prop)), - _scores(py::len(phi_0), 0.0), - _task_sizes(node_value_arrs::TASK_SZ_TRAIN), - _start_rung(1, 0), - _feature_space_file("feature_space/selected_features.txt"), - _feature_space_summary_file("feature_space/SIS_summary.txt"), - _project_type(project_type), - _mpi_comm(mpi_setup::comm), - _cross_cor_max(cross_corr_max), - _l_bound(min_abs_feat_val), - _u_bound(max_abs_feat_val), - _max_rung(max_rung), - _n_sis_select(n_sis_select), - _n_feat(py::len(phi_0)), - _n_rung_store(n_rung_store), - _n_rung_generate(n_rung_generate), - _n_samp(_phi[0]->n_samp()), - _max_param_depth(0), - _reparam_residual(false) -{ - initialize_fs(); -} -#endif FeatureSpace::FeatureSpace( std::string feature_file, py::list phi_0, @@ -200,10 +31,10 @@ FeatureSpace::FeatureSpace( double cross_corr_max ): _phi_0(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)), - _prop(python_conv_utils::from_ndarray<double>(prop)), + _prop_train(python_conv_utils::from_ndarray<double>(prop)), _scores(py::len(phi_0), 0.0), _start_rung(1, 0), - _task_sizes(python_conv_utils::from_list<int>(task_sizes)), + _task_sizes_train(python_conv_utils::from_list<int>(task_sizes)), _feature_space_file("feature_space/selected_features.txt"), _feature_space_summary_file("feature_space/SIS_summary.txt"), _project_type(project_type), @@ -215,11 +46,11 @@ FeatureSpace::FeatureSpace( _n_feat(py::len(phi_0)), _n_rung_store(0), _n_rung_generate(0), - _n_samp(_phi_0[0]->n_samp()), + _n_samp_train(_phi_0[0]->n_samp()), _max_param_depth(-1), _reparam_residual(false) { - comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list); + comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp_train, _is_valid, _is_valid_feat_list); mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select); std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0); @@ -349,10 +180,10 @@ FeatureSpace::FeatureSpace( double cross_corr_max ): _phi_0(python_conv_utils::shared_ptr_vec_from_list<Node, FeatureNode>(phi_0)), - _prop(python_conv_utils::from_list<double>(prop)), + _prop_train(python_conv_utils::from_list<double>(prop)), _scores(py::len(phi_0), 0.0), _start_rung(1, 0), - _task_sizes(python_conv_utils::from_list<int>(task_sizes)), + _task_sizes_train(python_conv_utils::from_list<int>(task_sizes)), _feature_space_file("feature_space/selected_features.txt"), _feature_space_summary_file("feature_space/SIS_summary.txt"), _mpi_comm(mpi_setup::comm), @@ -363,11 +194,11 @@ FeatureSpace::FeatureSpace( _n_feat(py::len(phi_0)), _n_rung_store(0), _n_rung_generate(0), - _n_samp(_phi_0[0]->n_samp()), + _n_samp_train(_phi_0[0]->n_samp()), _max_param_depth(-1), _reparam_residual(false) { - comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp, _is_valid, _is_valid_feat_list); + comp_feats::set_is_valid_fxn(project_type, _cross_cor_max, _n_samp_train, _is_valid, _is_valid_feat_list); mpi_reduce_op::set_op(_project_type, _cross_cor_max, _n_sis_select); std::vector<node_ptr> phi_temp = str2node::phi_from_file(feature_file, _phi_0); diff --git a/src/python/py_interface/__init__.py b/src/python/py_interface/__init__.py index ddd97016f5424a9ea831439f1b60dae1fe77d5f4..4c903f548377de438b85e77ece541a260c0b0bf4 100644 --- a/src/python/py_interface/__init__.py +++ b/src/python/py_interface/__init__.py @@ -27,5 +27,5 @@ read_csv: Create initial feature set from a csv file get_fs: Generate a FeatureSpace for the calculation get_fs_solver: Generate a FeatureSpace and SISSOSolver for the calculation """ -from sissopp.py_interface.import_dataframe import read_csv -from sissopp.py_interface.get_solver import get_fs, get_fs_solver +from sissopp.py_interface.import_dataframe import read_csv, create_inputs +from sissopp.py_interface.get_solver import get_fs_solver diff --git a/src/python/py_interface/get_solver.py b/src/python/py_interface/get_solver.py index b5dea564f6268feff7d6f660b9304b91320594eb..bb030a04c6918a3741613886037cb73dc8c74bd0 100644 --- a/src/python/py_interface/get_solver.py +++ b/src/python/py_interface/get_solver.py @@ -21,7 +21,6 @@ get_fs_solver: Generate a FeatureSpace and SISSOSolver for the calculation import numpy as np import pandas as pd -from sissopp.py_interface.import_dataframe import read_csv from sissopp._sisso import ( FeatureSpace, SISSORegressor, @@ -30,203 +29,30 @@ from sissopp._sisso import ( ) -def get_fs( - phi_0, - prop, - task_sizes_train, - allowed_ops, - allowed_param_ops, - loss_type, - max_rung, - n_sis_select, -): - """Generate a FeatureSpace for the calculation +def get_fs_solver(inputs): + """Generate a FeatureSpace and SISSOSolver for the calculation from an Inputs Object Args: - phi_0 (list of FeatureNodes): The list of primary features - prop (np.ndarray): The property values for the training data - task_sizes_train (list): The number of samples in the training data for each task - allowed_ops (list): List of operations used to combine the features - allowed_param_ops (dict): A dict describing the desired non-linear parameterization - loss_type (str): type of calculation regression or classification - max_rung (int): Maximum rung for the calculation - n_sis_select (int): number of features to select in each round of SIS - - Returns: - FeatureSpace: The FeatureSpace for the calculation - """ - - if allowed_ops == "all": - allowed_ops = [ - "add", - "sub", - "mult", - "div", - "abs_diff", - "inv", - "abs", - "cos", - "sin", - "exp", - "neg_exp", - "log", - "sq", - "sqrt", - "cb", - "cbrt", - "six_pow", - ] - - try: - return FeatureSpace( - phi_0, - allowed_ops, - allowed_param_ops, - prop, - loss_type, - max_rung, - n_sis_select, - ) - except Exception as e: - if not str(e).startswith("Python argument types in") or ( - len(allowed_param_ops) > 0 - ): - raise - - return FeatureSpace( - phi_0, - allowed_ops, - prop, - loss_type, - max_rung, - n_sis_select, - ) - - -def get_fs_solver( - df, - prop_key="prop", - allowed_ops="all", - allowed_param_ops=[], - max_rung=0, - n_sis_select=1, - max_dim=1, - cols="all", - loss_type="regression", - n_residuals=1, - n_model_store=1, - task_key=None, - leave_out_frac=0.0, - leave_out_inds=None, -): - """Generate a FeatureSpace and SISSOSolver for the calculation - - Args: - df (str): The csv file containing all of the data for the calculation - prop_key (str): The key corresponding to which column in the csv file the property is stored in - allowed_ops (list): List of operations used to combine the features - allowed_param_ops (dict): A dict describing the desired non-linear parameterization - max_rung (int): Maximum rung for the calculation - n_sis_select (int): number of features to select in each round of SIS - max_dim (int): Maximum dimension of the models to learn - cols (list or str): The columns to include in the initial feature set - loss_type (str): type of calculation regression, log_regression, or classification - n_residuals (int): number of residuals to use for the next SIS step when learning higher dimensional models - n_model_store (int): number of models to store as output files - task_key (str): The key corresponding to which column in the csv file the task differentiation is stored in - leave_out_frac (float): Fraction of samples to leave out - leave_out_inds (list): Indices to use as the test set + inputs (Inputs): The inputs for the calculation Returns: tuple: A tuple containing: - - fs (FeatureSpace): The FeatureSpace for the calculation + - feat_space (FeatureSpace): The FeatureSpace for the calculation - sr (SISSORegressor): The SISSORegressor for the calculation """ - ( - phi_0, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - sample_ids_train, - sample_ids_test, - task_names, - ) = read_csv( - df, - prop_key, - cols=cols, - task_key=task_key, - leave_out_frac=leave_out_frac, - leave_out_inds=leave_out_inds, - max_rung=max_rung, - ) - - fs = get_fs( - phi_0, - prop, - task_sizes_train, - allowed_ops, - allowed_param_ops, - loss_type, - max_rung, - n_sis_select, - ) - - if loss_type.lower() == "regression": - print(sample_ids_train) - print(sample_ids_test) - print(task_names) - solver = SISSORegressor( - fs, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - max_dim, - n_residuals, - n_model_store, - sample_ids_train, - sample_ids_test, - task_names, - ) - elif loss_type.lower() == "log_regression": - solver = SISSOLogRegressor( - fs, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - max_dim, - n_residuals, - n_model_store, - sample_ids_train, - sample_ids_test, - task_names, - ) + print("fs gen") + feat_space = FeatureSpace(inputs) + + print("sr gen") + if inputs.calc_type.lower() == "regression": + solver = SISSORegressor(inputs, feat_space) + elif inputs.calc_type.lower() == "log_regression": + solver = SISSOLogRegressor(inputs, feat_space) + elif inputs.calc_type.lower() == "classification": + solver = SISSOClassifier(inputs, feat_space) else: - solver = SISSOClassifier( - fs, - prop_label, - prop_unit, - prop, - prop_test, - task_sizes_train, - task_sizes_test, - leave_out_inds, - max_dim, - n_residuals, - n_model_store, - sample_ids_train, - sample_ids_test, - task_names, + raise ValueError( + "The calculation type specified in inputs.calc_type is not supported" ) - return fs, solver + print("ret") + return feat_space, solver diff --git a/src/python/py_interface/import_dataframe.py b/src/python/py_interface/import_dataframe.py index a80a1a71a87453719df60e168704b0965f8adcba..a00f9b1cef3745858fcdde61ea166e192693a604 100644 --- a/src/python/py_interface/import_dataframe.py +++ b/src/python/py_interface/import_dataframe.py @@ -31,7 +31,7 @@ try: except ImportError: pass -from sissopp._sisso import Unit, FeatureNode +from sissopp._sisso import Unit, Inputs, FeatureNode, initialize_values_arr def get_unit(header): @@ -110,35 +110,27 @@ def extract_col(df, key, drop_col=True): def read_csv( df, prop_key, + inputs=Inputs(), cols="all", task_key=None, leave_out_frac=0.0, leave_out_inds=None, max_rung=None, ): - """Create initial feature set from a csv file + """Read a data.csv file and populate inputs with the relevant information Args: df (str or pandas.DataFrame): The DataFrame of csv file of the initial feature set prop_key (str): The key corresponding to which column in the csv file the property is stored in + inputs (Inputs): The inputs object for the calculation cols (list or str): The columns to include in the initial feature set task_key (str): The key corresponding to which column in the csv file the task differentiation is stored in leave_out_frac (float): The fraction (as a decimal) of indcies to leave out of the calculations leave_out_inds (list): List of indices to pull from the training data to act as a test set max_rung (int): Maximum rung of a feature + Returns: - tuple: A tuple containing: - - phi_0 (list of FeatureNodes): The list of primary features - - prop_label (str): The label used to describe the property - - prop_unit (Unit): The unit of the property - - prop_train (np.ndarray): The property values for the training data - - prop_test (np.ndarray): The property values for the test data - - task_sizes_train (list): The number of samples in the training data for each task - - task_sizes_test (list): The number of samples in the test data for each task - - leave_out_inds (list): Indices to use as the test set - - sample_ids_train (list): List of sample id's for the training data - - sample_ids_test (list): List of sample id's for the test data - - task_names (list): List of all task id names + inputs (Inputs): The updated inputs object for the calculation """ if not max_rung: raise ValueError("Maximum rung for the calculation is not defined.") @@ -220,16 +212,224 @@ def read_csv( ) ) - return ( - phi_0, - prop_label, - Unit(prop_unit), - prop[train_inds].flatten(), - prop[leave_out_inds].flatten(), - task_sizes_train, - task_sizes_test, - leave_out_inds, - list(df.index[train_inds].to_numpy().astype(str)), - list(df.index[leave_out_inds].to_numpy().astype(str)), - list(task_names), - ) + inputs.phi_0 = phi_0 + inputs.sample_ids_train = list(df.index[train_inds].to_numpy().astype(str)) + inputs.sample_ids_test = list(df.index[leave_out_inds].to_numpy().astype(str)) + inputs.prop_key = prop_key + inputs.prop_label = prop_label + inputs.prop_unit = Unit(prop_unit) + inputs.prop_train = prop[train_inds].flatten() + inputs.prop_test = prop[leave_out_inds].flatten() + inputs.task_names = list(task_names) + inputs.task_sizes_train = task_sizes_train + inputs.task_sizes_test = task_sizes_test + inputs.leave_out_inds = leave_out_inds + inputs.leave_out_frac = len(leave_out_inds) / len(prop) + + return inputs + + +def create_inputs( + df=None, + cols="all", + calc_type="regression", + phi_0=None, + sample_ids_train=None, + sample_ids_test=None, + task_names=None, + prop_key=None, + prop_label="Property", + prop_unit=Unit(), + prop_train=None, + prop_test=None, + task_key=None, + task_sizes_train=None, + task_sizes_test=None, + leave_out_frac=0.0, + leave_out_inds=None, + allowed_ops="all", + allowed_param_ops=None, + cross_cor_max=1.0, + l_bound=1e-50, + u_bound=1e50, + n_dim=1, + max_rung=0, + n_rung_store=0, + n_rung_generate=0, + n_sis_select=1, + n_residual=1, + n_models_store=1, + max_param_depth=None, + nlopt_seed=42, + fix_intercept=False, + global_param_opt=True, + reparam_residual=False, +): + """Create the Inputs object for the calculation (used for creating the FeatureSpace and SISSOSOlver) + Args: + df (str or pandas.DataFrame): The DataFrame of csv file of the initial feature set + cols (list or str): The columns to include in the initial feature set + calc_type (str): The type of LossFunction to use when projecting the features onto a property + phi_0 (list of FeatureNodes): A list of FeatureNodes for the primary feature space + sample_ids_train (list): A list storing all sample ids for the training samples + sample_ids_test (list): A list storing all sample ids for the test samples + task_names (list): A list storing the ID of the task names + prop_key (str): Key used to find the property column in the data file + prop_label (str): The label of the property + prop_unit (Unit): The Unit of the property + prop_train (np.ndarray): The value of the property to evaluate the loss function against for the training set + prop_test (np.ndarray): The value of the property to evaluate the loss function against for the test set + task_key (str): Key used to find the task column in the data file + task_sizes_train (list): Number of training samples per task + task_sizes_test (list): Number of testing samples per task + leave_out_frac (float): The fraction (as a decimal) of indcies to leave out of the calculations + leave_out_inds (list): List of indexes from the initial data file in the test set + allowed_ops (list): A list containing all allowed operators strings for operators with free parameters + allowed_param_ops (list): A list containing all allowed operators strings + cross_cor_max (float): Maximum cross-correlation used for selecting features + l_bound (float): The lower bound for the maximum absolute value of the features + u_bound (float): The upper bound for the maximum absolute value of the features + n_dim (int): The maximum number of features allowed in the linear model + max_rung (int): Maximum rung for the feature creation + n_rung_store (int): The number of rungs to calculate and store the value of the features for all samples + n_rung_generate (int): Either 0 or 1, and is the number of rungs to generate on the fly during SIS + n_sis_select (int): Number of features to select during each SIS iteration + n_residual (int): Number of residuals to pass to the next sis model + n_models_store (int): The number of models to output to files + max_param_depth (int): The maximum depth in the binary expression tree to set non-linear optimization + nlopt_seed (int): The seed used for the nlOpt library + fix_intercept (bool): If true the bias term is fixed at 0 + global_param_opt (bool): True if global optimization is requested for non-linear optimization of parameters (Can break reproducibility) + reparam_residual (bool): If True then reparameterize features using the residuals of each model + + Returns: + inputs (Inputs): The updated inputs object for the calculation + """ + if allowed_ops == "all": + allowed_ops = [ + "add", + "sub", + "mult", + "div", + "abs_diff", + "inv", + "abs", + "cos", + "sin", + "exp", + "neg_exp", + "log", + "sq", + "sqrt", + "cb", + "cbrt", + "six_pow", + ] + inputs = Inputs() + + # Set values that have well defined defaults + inputs.allowed_ops = allowed_ops + inputs.calc_type = calc_type + inputs.cross_cor_max = cross_cor_max + inputs.fix_intercept = fix_intercept + inputs.global_param_opt = global_param_opt + inputs.l_bound = l_bound + inputs.max_rung = max_rung + inputs.n_dim = n_dim + inputs.n_models_store = n_models_store + inputs.reparam_residual = reparam_residual + inputs.n_residual = n_residual + inputs.n_rung_store = n_rung_store + inputs.n_rung_generate = n_rung_generate + inputs.n_sis_select = n_sis_select + inputs.nlopt_seed = nlopt_seed + inputs.u_bound = u_bound + + # Set values with no stand alone defaults + if task_key is not None: + inputs.task_key = task_key + + if allowed_param_ops is not None: + inputs.allowed_param_ops = allowed_param_ops + + if max_param_depth is not None: + inputs.max_param_depth = max_param_depth + else: + inputs.max_param_depth = max_rung + + # Add items that can be read from a DataFrame or from the items passed here + if df: + inputs = read_csv( + df, + prop_key, + inputs, + cols, + task_key, + leave_out_inds=leave_out_inds, + max_rung=max_rung, + ) + else: + if not phi_0: + raise ValueError("If no DataFrame is passed then phi_0 must be passed") + + if not prop_train: + raise ValueError("If no DataFrame is passed then prop_train must be passed") + + if not task_sizes_train: + raise ValueError( + "If no DataFrame is passed then task_sizes_train must be passed" + ) + + n_samp_train = np.sum(task_sizes_train, dtype=np.int32) + if not sample_ids_train: + sample_ids_train = [str(ii) for ii in range(n_samp_train)] + + if not task_names: + task_names = [f"task_{ii}" for ii in range(len(task_sizes_train))] + else: + assert len(task_sizes_train) == len(task_names) + + if not task_sizes_test: + task_sizes_test = [0] * len(task_sizes_train) + assert len(sample_ids_test) == 0 + assert len(leave_out_inds) == 0 + else: + assert len(task_sizes_train) == len(task_sizes_test) + + n_samp_test = np.sum(task_sizes_test, dtype=np.int32) + if not sample_ids_test: + sample_ids_test = [ + str(ii) for ii in range(n_samp_train, n_samp_train + n_samp_test) + ] + else: + assert len(sample_ids_test) == n_samp_test + if not leave_out_inds: + leave_out_inds = [ + ii for ii in range(n_samp_train, n_samp_train + n_samp_test) + ] + else: + assert len(leave_out_inds) == n_samp_test + + if not prop_key: + prop_key = f"{prop_label} ({prop_unit})" + + if not prop_test and (n_samp_test == 0): + prop_test = np.zeros(0) + else: + assert len(prop_test) == n_samp_test + + inputs.phi_0 = phi_0 + inputs.sample_ids_train = sample_ids_train + inputs.sample_ids_test = sample_ids_test + inputs.prop_key = prop_key + inputs.prop_label = prop_label + inputs.prop_unit = prop_unit + inputs.prop_train = prop_train + inputs.prop_test = prop_test + inputs.task_names = task_names + inputs.task_sizes_train = task_sizes_train + inputs.task_sizes_test = task_sizes_test + inputs.leave_out_inds = leave_out_inds + inputs.leave_out_frac = n_samp_test / (n_samp_train + n_samp_test) + + return inputs diff --git a/tests/googletest/descriptor_identification/model/test_model_classifier.cc b/tests/googletest/descriptor_identification/model/test_model_classifier.cc index 71086ca7170b05590f0ac0b5007d124d4a5b9721..d09aff237f5f4b7b60916ad96a4c534defa717e5 100644 --- a/tests/googletest/descriptor_identification/model/test_model_classifier.cc +++ b/tests/googletest/descriptor_identification/model/test_model_classifier.cc @@ -43,13 +43,13 @@ namespace 1 ); - _task_keys = {"all"}; + task_names = {"all"}; _sample_ids_train = {"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"}; _sample_ids_test = {"20", "21"}; } std::vector<std::string> _sample_ids_train; std::vector<std::string> _sample_ids_test; - std::vector<std::string> _task_keys; + std::vector<std::string> task_names; std::vector<double> _prop; std::vector<double> _prop_test; @@ -72,7 +72,7 @@ namespace _leave_out_inds, _sample_ids_train, _sample_ids_test, - _task_keys + task_names ); EXPECT_STREQ(model.toString().c_str(), "[A]"); EXPECT_EQ(model.n_convex_overlap_train(), 0); diff --git a/tests/googletest/descriptor_identification/model/test_model_log_regressor.cc b/tests/googletest/descriptor_identification/model/test_model_log_regressor.cc index 07f37869ccb8d74325e58bfcb260b3060fd57a69..972a3761569cf897e0414b46c29995093587e537 100644 --- a/tests/googletest/descriptor_identification/model/test_model_log_regressor.cc +++ b/tests/googletest/descriptor_identification/model/test_model_log_regressor.cc @@ -43,13 +43,13 @@ namespace std::transform(value_1.begin(), value_1.end(), value_2.begin(), _prop.begin(), [](double v1, double v2){return std::log(0.001 * std::pow(v1, 0.1) * std::pow(v2, -2.1));}); std::transform(test_value_1.begin(), test_value_1.end(), test_value_2.begin(), _prop_test.begin(), [](double v1, double v2){return std::log(0.001 * std::pow(v1, 0.1) * std::pow(v2, -2.1));}); - _task_keys = {"all"}; + task_names = {"all"}; _sample_ids_train = {"0", "1", "2", "3", "4", "6", "7", "8", "9", "10"}; _sample_ids_test = {"5", "11"}; } std::vector<std::string> _sample_ids_train; std::vector<std::string> _sample_ids_test; - std::vector<std::string> _task_keys; + std::vector<std::string> task_names; std::vector<int> _leave_out_inds; std::vector<int> _task_sizes_train; @@ -81,7 +81,7 @@ namespace _leave_out_inds, _sample_ids_train, _sample_ids_test, - _task_keys + task_names ); EXPECT_STREQ(model.toString().c_str(), "exp(c0) * (A)^a0 * (B)^a1"); EXPECT_LT(model.rmse(), 1e-10); @@ -170,7 +170,7 @@ namespace _leave_out_inds, _sample_ids_train, _sample_ids_test, - _task_keys + task_names ); EXPECT_STREQ(model.toString().c_str(), "(A)^a0 * (B)^a1"); diff --git a/tests/googletest/descriptor_identification/model/test_model_regressor.cc b/tests/googletest/descriptor_identification/model/test_model_regressor.cc index d4eb901217760f76075f1ac745bbc6fed3affd66..72205435ef93a25b585933a437498bf12268d6d9 100644 --- a/tests/googletest/descriptor_identification/model/test_model_regressor.cc +++ b/tests/googletest/descriptor_identification/model/test_model_regressor.cc @@ -46,13 +46,13 @@ namespace std::transform(test_value_1.begin(), test_value_1.begin() + 1, test_value_2.begin(), _prop_test.begin(), [](double v1, double v2){return 0.001 + v1 + v2;}); std::transform(test_value_1.begin() + 1, test_value_1.end(), test_value_2.begin() + 1, _prop_test.begin() + 1, [](double v1, double v2){return -6.5 + 1.25 * v1 - 0.4 * v2;}); - _task_keys = {"task_1", "task_2"}; + task_names = {"task_1", "task_2"}; _sample_ids_train = {"0", "1", "2", "3", "4", "6", "7", "8", "9", "10"}; _sample_ids_test = {"5", "11"}; } std::vector<std::string> _sample_ids_train; std::vector<std::string> _sample_ids_test; - std::vector<std::string> _task_keys; + std::vector<std::string> task_names; std::vector<int> _leave_out_inds; std::vector<int> _task_sizes_train; @@ -84,7 +84,7 @@ namespace _leave_out_inds, _sample_ids_train, _sample_ids_test, - _task_keys + task_names ); EXPECT_STREQ(model.toString().c_str(), "c0 + a0 * A + a1 * B"); @@ -183,7 +183,7 @@ namespace _leave_out_inds, _sample_ids_train, _sample_ids_test, - _task_keys + task_names ); EXPECT_STREQ(model.toString().c_str(), "a0 * A + a1 * B"); diff --git a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_regressor.cc b/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_regressor.cc deleted file mode 100644 index 788b73d1496196d59e3851884e391cf4cb1e194d..0000000000000000000000000000000000000000 --- a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_regressor.cc +++ /dev/null @@ -1,304 +0,0 @@ -// Copyright 2021 Thomas A. R. Purcell -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include <descriptor_identifier/solver/SISSORegressor.hpp> -#include <boost/filesystem.hpp> -#include "gtest/gtest.h" -#include <random> - -namespace -{ - class SISSORegressorTests : public ::testing::Test - { - protected: - void SetUp() override - { - allowed_op_maps::set_node_maps(); - node_value_arrs::initialize_d_matrix_arr(); - mpi_setup::init_mpi_env(); - - node_value_arrs::initialize_values_arr(90, 10, 3, 2, true, false); - - _task_sizes_train = {36, 54}; - _task_sizes_test = {4, 6}; - _leave_out_inds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - - _sample_ids_train.resize(90); - for(int ii = 0; ii < 90; ++ii) - { - _sample_ids_train[ii] = std::to_string(ii); - } - - _sample_ids_test.resize(10); - for(int ii = 0; ii < 10; ++ii) - { - _sample_ids_test[ii] = std::to_string(ii); - } - - std::vector<double> value_1(90, 0.0); - std::vector<double> value_2(90, 0.0); - std::vector<double> value_3(90, 0.0); - - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); - std::vector<double> test_value_3(10, 0.0); - - std::default_random_engine generator; - std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); - std::uniform_real_distribution<double> distribution_params(-2.50, 2.50); - - for(int ii = 0; ii < 90; ++ii) - { - value_1[ii] = distribution_feats(generator); - value_2[ii] = distribution_feats(generator); - value_3[ii] = distribution_feats(generator); - } - - for(int ii = 0; ii < 10; ++ii) - { - test_value_1[ii] = distribution_feats(generator); - test_value_2[ii] = distribution_feats(generator); - test_value_3[ii] = distribution_feats(generator); - } - - node_ptr feat_1 = std::make_shared<FeatureNode>(0, "A", value_1, test_value_1, Unit("m")); - node_ptr feat_2 = std::make_shared<FeatureNode>(1, "B", value_2, test_value_2, Unit("m")); - node_ptr feat_3 = std::make_shared<FeatureNode>(2, "C", value_3, test_value_3, Unit("s")); - - _phi_0 ={feat_1, feat_2, feat_3}; - - double a00 = distribution_params(generator); - double a01 = distribution_params(generator); - - double a10 = distribution_params(generator); - double a11 = distribution_params(generator); - - double c00 = distribution_params(generator); - double c01 = distribution_params(generator); - - _prop = std::vector<double>(90, 0.0); - std::transform(value_1.begin(), value_1.begin() + _task_sizes_train[0], value_2.begin(), _prop.begin(), [&c00, &a00](double v1, double v2){return c00 + a00 * (v1 - v2) * (v1 - v2);}); - std::transform(value_1.begin() + _task_sizes_train[0], value_1.end(), value_2.begin() + _task_sizes_train[0], _prop.begin() + _task_sizes_train[0], [&c01, &a01](double v1, double v2){return c01 + a01 * (v1 - v2) * (v1 - v2);}); - - std::transform(value_3.begin(), value_3.begin() + _task_sizes_train[0], _prop.begin(), _prop.begin(), [&a10](double v3, double p){return p + a10 * v3;}); - std::transform(value_3.begin() + _task_sizes_train[0], value_3.end(), _prop.begin() + _task_sizes_train[0], _prop.begin() + _task_sizes_train[0], [&a11](double v3, double p){return p + a11 * v3;}); - - _prop_test = std::vector<double>(10, 0.0); - std::transform(test_value_1.begin(), test_value_1.begin() + _task_sizes_test[0], test_value_2.begin(), _prop_test.begin(), [&c00, &a00](double v1, double v2){return c00 + a00 * (v1 - v2) * (v1 - v2);}); - std::transform(test_value_1.begin() + _task_sizes_test[0], test_value_1.end(), test_value_2.begin() + _task_sizes_test[0], _prop_test.begin() + _task_sizes_test[0], [&c01, &a01](double v1, double v2){return c01 + a01 * (v1 - v2) * (v1 - v2);}); - - std::transform(test_value_3.begin(), test_value_3.begin() + _task_sizes_test[0], _prop_test.begin(), _prop_test.begin(), [&a10](double v3, double p){return p + a10 * v3;}); - std::transform(test_value_3.begin() + _task_sizes_test[0], test_value_3.end(), _prop_test.begin() + _task_sizes_test[0], _prop_test.begin() + _task_sizes_test[0], [&a11](double v3, double p){return p + a11 * v3;}); - - _prop_zero_int = std::vector<double>(90, 0.0); - std::transform(value_1.begin(), value_1.begin() + _task_sizes_train[0], value_2.begin(), _prop_zero_int.begin(), [&a00](double v1, double v2){return a00 * (v1 - v2) * (v1 - v2);}); - std::transform(value_1.begin() + _task_sizes_train[0], value_1.end(), value_2.begin() + _task_sizes_train[0], _prop_zero_int.begin() + _task_sizes_train[0], [&a01](double v1, double v2){return a01 * (v1 - v2) * (v1 - v2);}); - - std::transform(value_3.begin(), value_3.begin() + _task_sizes_train[0], _prop_zero_int.begin(), _prop_zero_int.begin(), [&a10](double v3, double p){return p + a10 * v3;}); - std::transform(value_3.begin() + _task_sizes_train[0], value_3.end(), _prop_zero_int.begin() + _task_sizes_train[0], _prop_zero_int.begin() + _task_sizes_train[0], [&a11](double v3, double p){return p + a11 * v3;}); - - _prop_test_zero_int = std::vector<double>(10, 0.0); - std::transform(test_value_1.begin(), test_value_1.begin() + _task_sizes_test[0], test_value_2.begin(), _prop_test_zero_int.begin(), [&a00](double v1, double v2){return a00 * (v1 - v2) * (v1 - v2);}); - std::transform(test_value_1.begin() + _task_sizes_test[0], test_value_1.end(), test_value_2.begin() + _task_sizes_test[0], _prop_test_zero_int.begin() + _task_sizes_test[0], [&a01](double v1, double v2){return a01 * (v1 - v2) * (v1 - v2);}); - - std::transform(test_value_3.begin(), test_value_3.begin() + _task_sizes_test[0], _prop_test_zero_int.begin(), _prop_test_zero_int.begin(), [&a10](double v3, double p){return p + a10 * v3;}); - std::transform(test_value_3.begin() + _task_sizes_test[0], test_value_3.end(), _prop_test_zero_int.begin() + _task_sizes_test[0], _prop_test_zero_int.begin() + _task_sizes_test[0], [&a11](double v3, double p){return p + a11 * v3;}); - - _allowed_ops = {"div", "sq", "cb", "sub"}; - _allowed_param_ops = {}; - - _task_keys = {"task_1", "task_2"}; - for(int ii = 10; ii < 100; ++ii) - { - _sample_ids_train.push_back(std::to_string(ii)); - } - - for(int ii = 0; ii < 10; ++ii) - { - _sample_ids_test.push_back(std::to_string(ii)); - } - } - std::vector<std::string> _sample_ids_train; - std::vector<std::string> _sample_ids_test; - std::vector<std::string> _task_keys; - - std::vector<std::string> _allowed_param_ops; - std::vector<std::string> _allowed_ops; - std::vector<node_ptr> _phi_0; - - std::vector<double> _prop; - std::vector<double> _prop_test; - - std::vector<double> _prop_zero_int; - std::vector<double> _prop_test_zero_int; - - std::vector<int> _task_sizes_train; - std::vector<int> _task_sizes_test; - std::vector<int> _leave_out_inds; - }; - - TEST_F(SISSORegressorTests, FixInterceptFalseTest) - { -#ifdef PARAMETERIZE - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop, - _task_sizes_train, - "regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#else - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop, - _task_sizes_train, - "regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - );; -#endif - SISSORegressor sisso( - feat_space, - "Property", - Unit("m"), - _prop, - _prop_test, - _task_sizes_train, - _task_sizes_test, - _leave_out_inds, - 2, - 2, - 3, - _sample_ids_train, - _sample_ids_test, - _task_keys, - false - ); - std::vector<double> prop_comp(90, 0.0); - std::transform(_prop.begin(), _prop.end(), sisso.prop().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); - EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.end(), [](double p){return p > 1e-10;})); - - std::transform(_prop_test.begin(), _prop_test.begin() + 2, sisso.prop_test().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); - EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.begin() + 2, [](double p){return p > 1e-10;})); - - EXPECT_EQ(sisso.n_samp(), 90); - EXPECT_EQ(sisso.n_dim(), 2); - EXPECT_EQ(sisso.n_residual(), 2); - EXPECT_EQ(sisso.n_models_store(), 3); - - sisso.fit(); - - EXPECT_EQ(sisso.models().size(), 2); - EXPECT_EQ(sisso.models()[0].size(), 3); - - EXPECT_LT(sisso.models().back()[0].rmse(), 1e-10); - EXPECT_LT(sisso.models().back()[0].test_rmse(), 1e-10); - - boost::filesystem::remove_all("feature_space/"); - boost::filesystem::remove_all("models/"); - } - - TEST_F(SISSORegressorTests, FixInterceptTrueTest) - { -#ifdef PARAMETERIZE - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop_zero_int, - _task_sizes_train, - "regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#else - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop_zero_int, - _task_sizes_train, - "regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#endif - SISSORegressor sisso( - feat_space, - "Property", - Unit("m"), - _prop_zero_int, - _prop_test_zero_int, - _task_sizes_train, - _task_sizes_test, - _leave_out_inds, - 2, - 2, - 3, - _sample_ids_train, - _sample_ids_test, - _task_keys, - true - ); - - std::vector<double> prop_comp(90, 0.0); - std::transform(_prop_zero_int.begin(), _prop_zero_int.end(), sisso.prop().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); - EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.end(), [](double p){return p > 1e-10;})); - - std::transform(_prop_test_zero_int.begin(), _prop_test_zero_int.begin() + 2, sisso.prop_test().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); - EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.begin() + 2, [](double p){return p > 1e-10;})); - - EXPECT_EQ(sisso.n_samp(), 90); - EXPECT_EQ(sisso.n_dim(), 2); - EXPECT_EQ(sisso.n_residual(), 2); - EXPECT_EQ(sisso.n_models_store(), 3); - - sisso.fit(); - - EXPECT_EQ(sisso.models().size(), 2); - EXPECT_EQ(sisso.models()[0].size(), 3); - - EXPECT_LT(sisso.models().back()[0].rmse(), 1e-10); - EXPECT_LT(sisso.models().back()[0].test_rmse(), 1e-10); - - boost::filesystem::remove_all("feature_space/"); - boost::filesystem::remove_all("models/"); - } -} diff --git a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_classifier.cc b/tests/googletest/descriptor_identification/solver/test_sisso_classifier.cc similarity index 53% rename from tests/googletest/descriptor_identification/sisso_regressor/test_sisso_classifier.cc rename to tests/googletest/descriptor_identification/solver/test_sisso_classifier.cc index cb5bef9e3f43401a2fb39d5af2bdb3ac9f8450a3..7c2c0c1b8590cb796ef7d7d45d51fe834980888e 100644 --- a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_classifier.cc +++ b/tests/googletest/descriptor_identification/solver/test_sisso_classifier.cc @@ -27,31 +27,32 @@ namespace node_value_arrs::initialize_d_matrix_arr(); mpi_setup::init_mpi_env(); - node_value_arrs::initialize_values_arr(80, 20, 2, 2, true, false); + std::vector<int> task_sizes_train = {80}; + std::vector<int> task_sizes_test = {20}; - _task_sizes_train = {80}; - _task_sizes_test = {20}; - _leave_out_inds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + node_value_arrs::initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2, false); - _sample_ids_train.resize(80); - for(int ii = 0; ii < 80; ++ii) + std::vector<std::string> sample_ids_train(task_sizes_train[0]); + for(int ii = 0; ii < task_sizes_train[0]; ++ii) { - _sample_ids_train[ii] = std::to_string(ii); + sample_ids_train[ii] = std::to_string(ii); } - _sample_ids_test.resize(20); - for(int ii = 0; ii < 20; ++ii) + std::vector<std::string> sample_ids_test(task_sizes_test[0]); + std::vector<int> leave_out_inds(task_sizes_test[0]); + for(int ii = 0; ii < task_sizes_test[0]; ++ii) { - _sample_ids_test[ii] = std::to_string(ii); + sample_ids_test[ii] = std::to_string(ii); + leave_out_inds[ii] = ii; } - _task_keys = {"task"}; + std::vector<std::string> task_names = {"all"}; - std::vector<double> value_1(80, 0.0); - std::vector<double> value_2(80, 0.0); + std::vector<double> value_1(task_sizes_train[0], 0.0); + std::vector<double> value_2(task_sizes_train[0], 0.0); - std::vector<double> test_value_1(20, 0.0); - std::vector<double> test_value_2(20, 0.0); + std::vector<double> test_value_1(task_sizes_test[0], 0.0); + std::vector<double> test_value_2(task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_12_pos(1.0, 2.0); @@ -103,97 +104,65 @@ namespace test_value_2[ii] = distribution_12_neg(generator); } - node_ptr feat_1 = std::make_shared<FeatureNode>(0, "A", value_1, test_value_1, Unit("m")); - node_ptr feat_2 = std::make_shared<FeatureNode>(1, "B", value_2, test_value_2, Unit("m")); + FeatureNode feat_1(0, "A", value_1, test_value_1, Unit("m")); + FeatureNode feat_2(1, "B", value_2, test_value_2, Unit("m")); - _prop = std::vector<double>(80, 0.0); - _prop_test = std::vector<double>(20, 0.0); + std::vector<double> prop = std::vector<double>(task_sizes_train[0], 0.0); + std::vector<double> prop_test = std::vector<double>(task_sizes_test[0], 0.0); - std::fill_n(_prop.begin() + 20, 20, 1.0); - std::fill_n(_prop.begin() + 40, 20, 2.0); - std::fill_n(_prop.begin() + 60, 20, 3.0); - std::fill_n(_prop_test.begin() + 5, 5, 1.0); - std::fill_n(_prop_test.begin() + 10, 5, 2.0); - std::fill_n(_prop_test.begin() + 15, 5, 3.0); + std::fill_n(prop.begin() + 20, 20, 1.0); + std::fill_n(prop.begin() + 40, 20, 2.0); + std::fill_n(prop.begin() + 60, 20, 3.0); + std::fill_n(prop_test.begin() + 5, 5, 1.0); + std::fill_n(prop_test.begin() + 10, 5, 2.0); + std::fill_n(prop_test.begin() + 15, 5, 3.0); - _phi_0 ={feat_1, feat_2}; + std::vector<FeatureNode> phi_0 ={feat_1, feat_2}; - _allowed_ops = {"sq", "cb", "sqrt", "cbrt", "six_pow", "inv"}; - _allowed_param_ops = {}; - } - std::vector<std::string> _sample_ids_train; - std::vector<std::string> _sample_ids_test; - std::vector<std::string> _task_keys; + std::vector<std::string> allowed_ops = {"sq", "cb", "sqrt", "cbrt", "six_pow", "inv"}; + std::vector<std::string> allowed_param_ops = {}; + + inputs.set_calc_type("classification"); + inputs.set_phi_0(phi_0); + inputs.set_prop_train(prop); + inputs.set_prop_test(prop_test); + + inputs.set_task_names(task_names); + inputs.set_task_sizes_train(task_sizes_train); + inputs.set_task_sizes_test(task_sizes_test); + inputs.set_leave_out_inds(leave_out_inds); + + inputs.set_sample_ids_train(sample_ids_train); + inputs.set_sample_ids_test(sample_ids_test); - std::vector<std::string> _allowed_param_ops; - std::vector<std::string> _allowed_ops; - std::vector<node_ptr> _phi_0; + inputs.set_allowed_param_ops(allowed_param_ops); + inputs.set_allowed_ops(allowed_ops); - std::vector<double> _prop; - std::vector<double> _prop_test; + inputs.set_max_rung(2); + inputs.set_n_sis_select(5); + inputs.set_n_rung_store(1); + inputs.set_n_rung_generate(0); - std::vector<int> _task_sizes_train; - std::vector<int> _task_sizes_test; - std::vector<int> _leave_out_inds; + inputs.set_prop_label("Class"); + inputs.set_prop_unit(Unit()); + inputs.set_n_dim(2); + inputs.set_n_residual(2); + inputs.set_n_models_store(3); + } + + InputParser inputs; }; TEST_F(SISSOClassifierTests, FixInterceptFalseTest) { -#ifdef PARAMETERIZE - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop, - _task_sizes_train, - "classification", - 2, - 5, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#else - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop, - _task_sizes_train, - "classification", - 2, - 5, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#endif - SISSOClassifier sisso( - feat_space, - "Property", - Unit("m"), - _prop, - _prop_test, - _task_sizes_train, - _task_sizes_test, - _leave_out_inds, - 2, - 2, - 3, - _sample_ids_train, - _sample_ids_test, - _task_keys - ); + std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>(inputs); + SISSOClassifier sisso(inputs, feat_space); + std::vector<double> prop_comp(80, 0.0); - std::transform(_prop.begin(), _prop.end(), sisso.prop().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); + std::transform(inputs.prop_train().begin(), inputs.prop_train().end(), sisso.prop_train().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.end(), [](double p){return p > 1e-10;})); - std::transform(_prop_test.begin(), _prop_test.begin() + 10, sisso.prop_test().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); + std::transform(inputs.prop_test().begin(), inputs.prop_test().begin() + 10, sisso.prop_test().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.begin() + 10, [](double p){return p > 1e-10;})); EXPECT_EQ(sisso.n_samp(), 80); @@ -214,5 +183,8 @@ namespace boost::filesystem::remove_all("feature_space/"); boost::filesystem::remove_all("models/"); + + sisso.models().back()[0].to_file("sisso_classifier_train_model.dat", true); + sisso.models().back()[0].to_file("sisso_classifier_test_model.dat", false); } } diff --git a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor.cc b/tests/googletest/descriptor_identification/solver/test_sisso_log_regressor.cc similarity index 54% rename from tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor.cc rename to tests/googletest/descriptor_identification/solver/test_sisso_log_regressor.cc index 431e22aaeb2aad3991ca218506c8fde22de51b16..35995fcb574b8abb6fcec474d795792f6e7958e3 100644 --- a/tests/googletest/descriptor_identification/sisso_regressor/test_sisso_log_regressor.cc +++ b/tests/googletest/descriptor_identification/solver/test_sisso_log_regressor.cc @@ -27,163 +27,123 @@ namespace node_value_arrs::initialize_d_matrix_arr(); mpi_setup::init_mpi_env(); - node_value_arrs::initialize_values_arr(90, 10, 3, 2, true, false); + std::vector<int> task_sizes_train = {90}; + std::vector<int> task_sizes_test = {10}; - _task_sizes_train = {90}; - _task_sizes_test = {10}; - _leave_out_inds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + node_value_arrs::initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2, false); - _sample_ids_train.resize(90); - for(int ii = 0; ii < 90; ++ii) + std::vector<int> leave_out_inds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + std::vector<std::string> task_names = {"all"}; + std::vector<std::string> sample_ids_train(task_sizes_train[0]); + for(int ii = 0; ii < task_sizes_train[0]; ++ii) { - _sample_ids_train[ii] = std::to_string(ii); + sample_ids_train[ii] = std::to_string(ii); } - _sample_ids_test.resize(10); - for(int ii = 0; ii < 10; ++ii) + std::vector<std::string> sample_ids_test(task_sizes_test[0]); + for(int ii = 0; ii < task_sizes_test[0]; ++ii) { - _sample_ids_test[ii] = std::to_string(ii); + sample_ids_test[ii] = std::to_string(ii); } - std::vector<double> value_1(90, 0.0); - std::vector<double> value_2(90, 0.0); - std::vector<double> value_3(90, 0.0); + std::vector<double> value_1(task_sizes_train[0], 0.0); + std::vector<double> value_2(task_sizes_train[0], 0.0); + std::vector<double> value_3(task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); - std::vector<double> test_value_3(10, 0.0); + std::vector<double> test_value_1(task_sizes_test[0], 0.0); + std::vector<double> test_value_2(task_sizes_test[0], 0.0); + std::vector<double> test_value_3(task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(0.01, 100.0); std::uniform_real_distribution<double> distribution_params(0.9, 1.1); - for(int ii = 0; ii < 90; ++ii) + for(int ii = 0; ii < task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = distribution_feats(generator); value_3[ii] = distribution_feats(generator); } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = distribution_feats(generator); test_value_3[ii] = distribution_feats(generator); } - node_ptr feat_1 = std::make_shared<FeatureNode>(0, "A", value_1, test_value_1, Unit("m")); - node_ptr feat_2 = std::make_shared<FeatureNode>(1, "B", value_2, test_value_2, Unit("m")); - node_ptr feat_3 = std::make_shared<FeatureNode>(2, "C", value_3, test_value_3, Unit("s")); + FeatureNode feat_1(0, "A", value_1, test_value_1, Unit("m")); + FeatureNode feat_2(1, "B", value_2, test_value_2, Unit("m")); + FeatureNode feat_3(2, "C", value_3, test_value_3, Unit("s")); - _phi_0 ={feat_1, feat_2, feat_3}; + std::vector<FeatureNode> phi_0 ={feat_1, feat_2, feat_3}; double a00 = distribution_params(generator); double a10 = distribution_params(generator); double c00 = distribution_feats(generator); - _prop = std::vector<double>(90, 0.0); + _prop = std::vector<double>(task_sizes_train[0], 0.0); std::transform(value_1.begin(), value_1.end(), value_2.begin(), _prop.begin(), [&c00, &a00, &a10](double v1, double v2){return c00 * std::pow(v1 * v1, a00) * std::pow(v2, a10);}); - _prop_test = std::vector<double>(10, 0.0); + _prop_test = std::vector<double>(task_sizes_test[0], 0.0); std::transform(test_value_1.begin(), test_value_1.end(), test_value_2.begin(), _prop_test.begin(), [&c00, &a00, &a10](double v1, double v2){return c00 * std::pow(v1 * v1, a00) * std::pow(v2, a10);}); - _prop_zero_int = std::vector<double>(90, 0.0); + _prop_zero_int = std::vector<double>(task_sizes_train[0], 0.0); std::transform(value_1.begin(), value_1.end(), value_2.begin(), _prop_zero_int.begin(), [&a00, &a10](double v1, double v2){return std::pow(v1 * v1, a00) * std::pow(v2, a10);}); - _prop_test_zero_int = std::vector<double>(10, 0.0); + _prop_test_zero_int = std::vector<double>(task_sizes_test[0], 0.0); std::transform(test_value_1.begin(), test_value_1.end(), test_value_2.begin(), _prop_test_zero_int.begin(), [&a00, &a10](double v1, double v2){return std::pow(v1 * v1, a00) * std::pow(v2, a10);}); - _allowed_ops = {"div", "add", "mult", "sub"}; - _allowed_param_ops = {}; - - _task_keys = {"all"}; - for(int ii = 10; ii < 100; ++ii) - { - _sample_ids_train.push_back(std::to_string(ii)); - } - - for(int ii = 0; ii < 10; ++ii) - { - _sample_ids_test.push_back(std::to_string(ii)); - } + std::vector<std::string> allowed_ops = {"div", "add", "mult", "sub"}; + std::vector<std::string> allowed_param_ops = {}; + + inputs.set_task_sizes_train(task_sizes_train); + inputs.set_task_sizes_test(task_sizes_test); + inputs.set_sample_ids_train(sample_ids_train); + inputs.set_sample_ids_test(sample_ids_test); + inputs.set_task_names(task_names); + inputs.set_allowed_param_ops(allowed_param_ops); + inputs.set_allowed_ops(allowed_ops); + inputs.set_phi_0(phi_0); + inputs.set_task_sizes_train(task_sizes_train); + inputs.set_task_sizes_test(task_sizes_test); + inputs.set_leave_out_inds(leave_out_inds); + inputs.set_calc_type("log_regression"); + inputs.set_max_rung(2); + inputs.set_n_sis_select(10); + inputs.set_n_rung_store(1); + inputs.set_n_rung_generate(0); + + inputs.set_prop_label("Property"); + inputs.set_prop_unit(Unit("m")); + inputs.set_n_dim(2); + inputs.set_n_residual(2); + inputs.set_n_models_store(3); } - std::vector<std::string> _sample_ids_train; - std::vector<std::string> _sample_ids_test; - std::vector<std::string> _task_keys; - - std::vector<std::string> _allowed_param_ops; - std::vector<std::string> _allowed_ops; - std::vector<node_ptr> _phi_0; + InputParser inputs; std::vector<double> _prop; std::vector<double> _prop_test; std::vector<double> _prop_zero_int; std::vector<double> _prop_test_zero_int; - - std::vector<int> _task_sizes_train; - std::vector<int> _task_sizes_test; - std::vector<int> _leave_out_inds; }; TEST_F(SISSOLogRegressorTests, FixInterceptFalseTest) { -#ifdef PARAMETERIZE - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop, - _task_sizes_train, - std::string("log_regression"), - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#else - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop, - _task_sizes_train, - std::string("log_regression"), - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#endif - SISSOLogRegressor sisso( - feat_space, - "Property", - Unit("m"), - _prop, - _prop_test, - _task_sizes_train, - _task_sizes_test, - _leave_out_inds, - 2, - 2, - 3, - _sample_ids_train, - _sample_ids_test, - _task_keys, - false - ); + inputs.set_prop_train(_prop); + inputs.set_prop_test(_prop_test); + inputs.set_fix_intercept(false); + + std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>(inputs); + SISSOLogRegressor sisso(inputs, feat_space); + std::vector<double> prop_comp(90, 0.0); std::transform( _prop.begin(), _prop.end(), - sisso.prop().begin(), + sisso.prop_train().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(std::log(p1) - p2);} ); @@ -217,63 +177,18 @@ namespace TEST_F(SISSOLogRegressorTests, FixInterceptTrueTest) { -#ifdef PARAMETERIZE - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop_zero_int, - _task_sizes_train, - "log_regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#else - std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop_zero_int, - _task_sizes_train, - "log_regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#endif - SISSOLogRegressor sisso( - feat_space, - "Property", - Unit("m"), - _prop_zero_int, - _prop_test_zero_int, - _task_sizes_train, - _task_sizes_test, - _leave_out_inds, - 2, - 2, - 3, - _sample_ids_train, - _sample_ids_test, - _task_keys, - true - ); + inputs.set_prop_train(_prop_zero_int); + inputs.set_prop_test(_prop_test_zero_int); + inputs.set_fix_intercept(true); + + std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>(inputs); + SISSOLogRegressor sisso(inputs, feat_space); std::vector<double> prop_comp(90, 0.0); std::transform( _prop_zero_int.begin(), _prop_zero_int.end(), - sisso.prop().begin(), + sisso.prop_train().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(std::log(p1) - p2);} ); diff --git a/tests/googletest/descriptor_identification/solver/test_sisso_regressor.cc b/tests/googletest/descriptor_identification/solver/test_sisso_regressor.cc new file mode 100644 index 0000000000000000000000000000000000000000..03cdd564a9e1e7aa289208967d0e4b9bb5c5141c --- /dev/null +++ b/tests/googletest/descriptor_identification/solver/test_sisso_regressor.cc @@ -0,0 +1,224 @@ +// Copyright 2021 Thomas A. R. Purcell +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include <descriptor_identifier/solver/SISSORegressor.hpp> +#include <boost/filesystem.hpp> +#include "gtest/gtest.h" +#include <random> + +namespace +{ + class SISSORegressorTests : public ::testing::Test + { + protected: + void SetUp() override + { + allowed_op_maps::set_node_maps(); + node_value_arrs::initialize_d_matrix_arr(); + mpi_setup::init_mpi_env(); + + std::vector<int> task_sizes_train = {36, 54}; + std::vector<int> task_sizes_test = {4, 6}; + + int n_samp_train = std::accumulate(task_sizes_train.begin(), task_sizes_train.end(), 0); + int n_samp_test = std::accumulate(task_sizes_test.begin(), task_sizes_test.end(), 0); + + node_value_arrs::initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2, false); + + std::vector<int> leave_out_inds = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + std::vector<std::string> sample_ids_train(n_samp_train); + for(int ii = 0; ii < n_samp_train; ++ii) + { + sample_ids_train[ii] = std::to_string(ii); + } + + std::vector<std::string> sample_ids_test(n_samp_test); + for(int ii = 0; ii < n_samp_test; ++ii) + { + sample_ids_test[ii] = std::to_string(ii); + } + + std::vector<double> value_1(n_samp_train, 0.0); + std::vector<double> value_2(n_samp_train, 0.0); + std::vector<double> value_3(n_samp_train, 0.0); + + std::vector<double> test_value_1(n_samp_test, 0.0); + std::vector<double> test_value_2(n_samp_test, 0.0); + std::vector<double> test_value_3(n_samp_test, 0.0); + + std::default_random_engine generator; + std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); + std::uniform_real_distribution<double> distribution_params(-2.50, 2.50); + + for(int ii = 0; ii < n_samp_train; ++ii) + { + value_1[ii] = distribution_feats(generator); + value_2[ii] = distribution_feats(generator); + value_3[ii] = distribution_feats(generator); + } + + for(int ii = 0; ii < n_samp_test; ++ii) + { + test_value_1[ii] = distribution_feats(generator); + test_value_2[ii] = distribution_feats(generator); + test_value_3[ii] = distribution_feats(generator); + } + + FeatureNode feat_1(0, "A", value_1, test_value_1, Unit("m")); + FeatureNode feat_2(1, "B", value_2, test_value_2, Unit("m")); + FeatureNode feat_3(2, "C", value_3, test_value_3, Unit("s")); + + std::vector<FeatureNode> phi_0 ={feat_1, feat_2, feat_3}; + + double a00 = distribution_params(generator); + double a01 = distribution_params(generator); + + double a10 = distribution_params(generator); + double a11 = distribution_params(generator); + + double c00 = distribution_params(generator); + double c01 = distribution_params(generator); + + _prop = std::vector<double>(n_samp_train, 0.0); + std::transform(value_1.begin(), value_1.begin() + task_sizes_train[0], value_2.begin(), _prop.begin(), [&c00, &a00](double v1, double v2){return c00 + a00 * (v1 - v2) * (v1 - v2);}); + std::transform(value_1.begin() + task_sizes_train[0], value_1.end(), value_2.begin() + task_sizes_train[0], _prop.begin() + task_sizes_train[0], [&c01, &a01](double v1, double v2){return c01 + a01 * (v1 - v2) * (v1 - v2);}); + + std::transform(value_3.begin(), value_3.begin() + task_sizes_train[0], _prop.begin(), _prop.begin(), [&a10](double v3, double p){return p + a10 * v3;}); + std::transform(value_3.begin() + task_sizes_train[0], value_3.end(), _prop.begin() + task_sizes_train[0], _prop.begin() + task_sizes_train[0], [&a11](double v3, double p){return p + a11 * v3;}); + + _prop_test = std::vector<double>(n_samp_test, 0.0); + std::transform(test_value_1.begin(), test_value_1.begin() + task_sizes_test[0], test_value_2.begin(), _prop_test.begin(), [&c00, &a00](double v1, double v2){return c00 + a00 * (v1 - v2) * (v1 - v2);}); + std::transform(test_value_1.begin() + task_sizes_test[0], test_value_1.end(), test_value_2.begin() + task_sizes_test[0], _prop_test.begin() + task_sizes_test[0], [&c01, &a01](double v1, double v2){return c01 + a01 * (v1 - v2) * (v1 - v2);}); + + std::transform(test_value_3.begin(), test_value_3.begin() + task_sizes_test[0], _prop_test.begin(), _prop_test.begin(), [&a10](double v3, double p){return p + a10 * v3;}); + std::transform(test_value_3.begin() + task_sizes_test[0], test_value_3.end(), _prop_test.begin() + task_sizes_test[0], _prop_test.begin() + task_sizes_test[0], [&a11](double v3, double p){return p + a11 * v3;}); + + _prop_zero_int = std::vector<double>(n_samp_train, 0.0); + std::transform(value_1.begin(), value_1.begin() + task_sizes_train[0], value_2.begin(), _prop_zero_int.begin(), [&a00](double v1, double v2){return a00 * (v1 - v2) * (v1 - v2);}); + std::transform(value_1.begin() + task_sizes_train[0], value_1.end(), value_2.begin() + task_sizes_train[0], _prop_zero_int.begin() + task_sizes_train[0], [&a01](double v1, double v2){return a01 * (v1 - v2) * (v1 - v2);}); + + std::transform(value_3.begin(), value_3.begin() + task_sizes_train[0], _prop_zero_int.begin(), _prop_zero_int.begin(), [&a10](double v3, double p){return p + a10 * v3;}); + std::transform(value_3.begin() + task_sizes_train[0], value_3.end(), _prop_zero_int.begin() + task_sizes_train[0], _prop_zero_int.begin() + task_sizes_train[0], [&a11](double v3, double p){return p + a11 * v3;}); + + _prop_test_zero_int = std::vector<double>(n_samp_test, 0.0); + std::transform(test_value_1.begin(), test_value_1.begin() + task_sizes_test[0], test_value_2.begin(), _prop_test_zero_int.begin(), [&a00](double v1, double v2){return a00 * (v1 - v2) * (v1 - v2);}); + std::transform(test_value_1.begin() + task_sizes_test[0], test_value_1.end(), test_value_2.begin() + task_sizes_test[0], _prop_test_zero_int.begin() + task_sizes_test[0], [&a01](double v1, double v2){return a01 * (v1 - v2) * (v1 - v2);}); + + std::transform(test_value_3.begin(), test_value_3.begin() + task_sizes_test[0], _prop_test_zero_int.begin(), _prop_test_zero_int.begin(), [&a10](double v3, double p){return p + a10 * v3;}); + std::transform(test_value_3.begin() + task_sizes_test[0], test_value_3.end(), _prop_test_zero_int.begin() + task_sizes_test[0], _prop_test_zero_int.begin() + task_sizes_test[0], [&a11](double v3, double p){return p + a11 * v3;}); + + std::vector<std::string> task_names = {"task_1", "task_2"}; + std::vector<std::string> allowed_ops = {"div", "add", "mult", "sub", "sq", "cb"}; + std::vector<std::string> allowed_param_ops = {}; + + inputs.set_calc_type("regression"); + inputs.set_phi_0(phi_0); + + inputs.set_task_names(task_names); + inputs.set_task_sizes_train(task_sizes_train); + inputs.set_task_sizes_test(task_sizes_test); + inputs.set_leave_out_inds(leave_out_inds); + + inputs.set_sample_ids_train(sample_ids_train); + inputs.set_sample_ids_test(sample_ids_test); + + inputs.set_allowed_param_ops(allowed_param_ops); + inputs.set_allowed_ops(allowed_ops); + + inputs.set_max_rung(2); + inputs.set_n_sis_select(10); + inputs.set_n_rung_store(1); + inputs.set_n_rung_generate(0); + + inputs.set_prop_label("Property"); + inputs.set_prop_unit(Unit("m")); + inputs.set_n_dim(2); + inputs.set_n_residual(2); + inputs.set_n_models_store(3); + } + InputParser inputs; + + std::vector<double> _prop; + std::vector<double> _prop_test; + + std::vector<double> _prop_zero_int; + std::vector<double> _prop_test_zero_int; + }; + + TEST_F(SISSORegressorTests, FixInterceptFalseTest) + { + inputs.set_prop_train(_prop); + inputs.set_prop_test(_prop_test); + inputs.set_fix_intercept(false); + + std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>(inputs); + SISSORegressor sisso(inputs, feat_space); + + std::vector<double> prop_comp(90, 0.0); + std::transform(_prop.begin(), _prop.end(), sisso.prop_train().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); + EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.end(), [](double p){return p > 1e-10;})); + + std::transform(_prop_test.begin(), _prop_test.begin() + 2, sisso.prop_test().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); + EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.begin() + 2, [](double p){return p > 1e-10;})); + + EXPECT_EQ(sisso.n_samp(), 90); + EXPECT_EQ(sisso.n_dim(), 2); + EXPECT_EQ(sisso.n_residual(), 2); + EXPECT_EQ(sisso.n_models_store(), 3); + + sisso.fit(); + + EXPECT_EQ(sisso.models().size(), 2); + EXPECT_EQ(sisso.models()[0].size(), 3); + + EXPECT_LT(sisso.models().back()[0].rmse(), 1e-10); + EXPECT_LT(sisso.models().back()[0].test_rmse(), 1e-10); + + boost::filesystem::remove_all("feature_space/"); + boost::filesystem::remove_all("models/"); + } + + TEST_F(SISSORegressorTests, FixInterceptTrueTest) + { + inputs.set_prop_train(_prop_zero_int); + inputs.set_prop_test(_prop_test_zero_int); + inputs.set_fix_intercept(true); + + std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>(inputs); + SISSORegressor sisso(inputs, feat_space); + + std::vector<double> prop_comp(90, 0.0); + std::transform(_prop_zero_int.begin(), _prop_zero_int.end(), sisso.prop_train().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); + EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.end(), [](double p){return p > 1e-10;})); + + std::transform(_prop_test_zero_int.begin(), _prop_test_zero_int.begin() + 2, sisso.prop_test().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);}); + EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.begin() + 2, [](double p){return p > 1e-10;})); + + EXPECT_EQ(sisso.n_samp(), 90); + EXPECT_EQ(sisso.n_dim(), 2); + EXPECT_EQ(sisso.n_residual(), 2); + EXPECT_EQ(sisso.n_models_store(), 3); + + sisso.fit(); + + EXPECT_EQ(sisso.models().size(), 2); + EXPECT_EQ(sisso.models()[0].size(), 3); + + EXPECT_LT(sisso.models().back()[0].rmse(), 1e-10); + EXPECT_LT(sisso.models().back()[0].test_rmse(), 1e-10); + + boost::filesystem::remove_all("feature_space/"); + boost::filesystem::remove_all("models/"); + } +} diff --git a/tests/googletest/feature_creation/feature_generation/test_abs_diff_node.cc b/tests/googletest/feature_creation/feature_generation/test_abs_diff_node.cc index 2f00738828072bb5645479ebab07c21c93d62cec..15db40a58c96c5e629f5ccbf74569acf2d1e205d 100644 --- a/tests/googletest/feature_creation/feature_generation/test_abs_diff_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_abs_diff_node.cc @@ -25,7 +25,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {5.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_abs_node.cc b/tests/googletest/feature_creation/feature_generation/test_abs_node.cc index d9550d0dab03551b18b17bad9614f0c811e8d2b4..24ec3033d94c4037efc74d44f2af8dc22a271ce0 100644 --- a/tests/googletest/feature_creation/feature_generation/test_abs_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_abs_node.cc @@ -24,7 +24,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {-1.0, -2.0, -3.0, -4.0}; std::vector<double> test_value_1 = {50.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_add_node.cc b/tests/googletest/feature_creation/feature_generation/test_add_node.cc index d979f111593695086fd610a9abd54d205c8ff9bd..7a721416b9952e6cb0f73808b1e09f1cd1cafe8f 100644 --- a/tests/googletest/feature_creation/feature_generation/test_add_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_add_node.cc @@ -24,7 +24,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {5.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_cb_node.cc b/tests/googletest/feature_creation/feature_generation/test_cb_node.cc index efe0b392d7b016f8e3ddde599fb67c7a0c87e0e8..dcb9552d4ea84f4411a82c3b9942bf30290cb086 100644 --- a/tests/googletest/feature_creation/feature_generation/test_cb_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_cb_node.cc @@ -28,7 +28,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 8.0}; std::vector<double> test_value_1 = {2.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_cbrt_node.cc b/tests/googletest/feature_creation/feature_generation/test_cbrt_node.cc index 0375b79a57423a38f6adbd138b993b798b03f3d9..6317a29e917b1836b2880422ce222ac83d306ba7 100644 --- a/tests/googletest/feature_creation/feature_generation/test_cbrt_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_cbrt_node.cc @@ -29,7 +29,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 8.0}; std::vector<double> test_value_1 = {8.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_cos_node.cc b/tests/googletest/feature_creation/feature_generation/test_cos_node.cc index 11208163a041e629d1db0f1e5dabae96d387027d..23da36c272f47ea70bc699b069854a3667ddf3fb 100644 --- a/tests/googletest/feature_creation/feature_generation/test_cos_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_cos_node.cc @@ -25,7 +25,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {0.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {0.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_div_node.cc b/tests/googletest/feature_creation/feature_generation/test_div_node.cc index beabdaa28234832cd175f838cc2989ec24b9325d..5a4997ed351d7fd15bf1953f6bb4cd69bad200fa 100644 --- a/tests/googletest/feature_creation/feature_generation/test_div_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_div_node.cc @@ -25,7 +25,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {5.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_exp_node.cc b/tests/googletest/feature_creation/feature_generation/test_exp_node.cc index 7e86b6336fd68525e9e448d5dc1eeca4f4330854..ea994d6994b84ec3de3791310495a5279df7bf3b 100644 --- a/tests/googletest/feature_creation/feature_generation/test_exp_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_exp_node.cc @@ -29,7 +29,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 3, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 3, 2, false); std::vector<double> value_1 = {0.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {0.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_feat_node.cc b/tests/googletest/feature_creation/feature_generation/test_feat_node.cc index f6a29ca1d51f472f68f246265960f2d58ee02385..852a83952f7cce3d36ed75eb18e1d480b1f8e1e5 100644 --- a/tests/googletest/feature_creation/feature_generation/test_feat_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_feat_node.cc @@ -23,7 +23,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 3, 0); + node_value_arrs::initialize_values_arr({4}, {1}, 3, 0, false); _value_1 = {1.0, 2.0, 3.0, 4.0}; _test_value_1 = {5.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_inv_node.cc b/tests/googletest/feature_creation/feature_generation/test_inv_node.cc index ec761238b2d235518bccab62c0d3dca927f17138..3d844655e9e701dcea4b64eb3195a0ce3686f657 100644 --- a/tests/googletest/feature_creation/feature_generation/test_inv_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_inv_node.cc @@ -29,7 +29,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 8.0}; std::vector<double> test_value_1 = {2.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_log_node.cc b/tests/googletest/feature_creation/feature_generation/test_log_node.cc index 391708165bf7331cf2db23ac5ab16b17d84780f0..f7a948841804684aae885dc97274e7a3f635eb06 100644 --- a/tests/googletest/feature_creation/feature_generation/test_log_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_log_node.cc @@ -35,7 +35,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {1.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_model_node.cc b/tests/googletest/feature_creation/feature_generation/test_model_node.cc index 52922540b9390e522388b435b929af16e5214a83..d1ad4c00dfc5f0b67ca9f02d0322f971bd7234a3 100644 --- a/tests/googletest/feature_creation/feature_generation/test_model_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_model_node.cc @@ -23,7 +23,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 3, 0); + node_value_arrs::initialize_values_arr({4}, {1}, 3, 0, false); _value_1 = {1.0, 2.0, 3.0, 4.0}; _test_value_1 = {5.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_mult_node.cc b/tests/googletest/feature_creation/feature_generation/test_mult_node.cc index 46803bb5074512a8ef50258b5f4319ea9567cda2..ce8839c70318671da39a0656740f495bbe6ddf23 100644 --- a/tests/googletest/feature_creation/feature_generation/test_mult_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_mult_node.cc @@ -24,7 +24,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {5.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_neg_exp_node.cc b/tests/googletest/feature_creation/feature_generation/test_neg_exp_node.cc index f106502bd9e0253ca94f5379e9dbc9fe44ee25c9..a894434d3e3cfed66de9430394de62bc03112fc7 100644 --- a/tests/googletest/feature_creation/feature_generation/test_neg_exp_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_neg_exp_node.cc @@ -29,7 +29,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 3, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 3, 2, false); std::vector<double> value_1 = {0.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {0.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_sin_node.cc b/tests/googletest/feature_creation/feature_generation/test_sin_node.cc index 07409e51ccc1aca5f411409ccde8d13411935f45..5e3f541d355bf0ce22aa4ead3b88a5cc88637616 100644 --- a/tests/googletest/feature_creation/feature_generation/test_sin_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_sin_node.cc @@ -25,7 +25,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {0.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {0.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_six_pow_node.cc b/tests/googletest/feature_creation/feature_generation/test_six_pow_node.cc index 4ac48a2a82226f32d2ab6a80161402e494aacc0d..f7ae6b719cb9e2a5ea1a22a03da7c08d2328ac68 100644 --- a/tests/googletest/feature_creation/feature_generation/test_six_pow_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_six_pow_node.cc @@ -30,7 +30,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {2.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_sq_node.cc b/tests/googletest/feature_creation/feature_generation/test_sq_node.cc index f2e5367054745693e708f550f9ec2f9df8926149..ac56f17a6a831e20421cc96c319d0a9cbe7873ab 100644 --- a/tests/googletest/feature_creation/feature_generation/test_sq_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_sq_node.cc @@ -27,7 +27,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 8.0}; std::vector<double> test_value_1 = {2.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_sqrt_node.cc b/tests/googletest/feature_creation/feature_generation/test_sqrt_node.cc index 54b808b42f68531cb8bd3e202457cfeccdcbb74f..16d9dd291ed942e75291cca3d5387ae1fb7d6fa2 100644 --- a/tests/googletest/feature_creation/feature_generation/test_sqrt_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_sqrt_node.cc @@ -30,7 +30,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {4.0}; diff --git a/tests/googletest/feature_creation/feature_generation/test_sub_node.cc b/tests/googletest/feature_creation/feature_generation/test_sub_node.cc index 4d8e046480c359d53160bf320bdde1304be4fa99..ab5559d790214a0f63be7ee743730d21ba01ef1f 100644 --- a/tests/googletest/feature_creation/feature_generation/test_sub_node.cc +++ b/tests/googletest/feature_creation/feature_generation/test_sub_node.cc @@ -24,7 +24,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 4, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false); std::vector<double> value_1 = {1.0, 2.0, 3.0, 4.0}; std::vector<double> test_value_1 = {5.0}; diff --git a/tests/googletest/feature_creation/feature_space/test_feat_space.cc b/tests/googletest/feature_creation/feature_space/test_feat_space.cc index 6f89b7fae353bdc2bc6ed25b0c89466743a41b1c..716bb824da01e050d2dd59bace3d0b11a627b4f8 100644 --- a/tests/googletest/feature_creation/feature_space/test_feat_space.cc +++ b/tests/googletest/feature_creation/feature_space/test_feat_space.cc @@ -29,314 +29,195 @@ namespace node_value_arrs::initialize_d_matrix_arr(); mpi_setup::init_mpi_env(); - _task_sizes = {5, 5}; - node_value_arrs::initialize_values_arr(10, 0, 3, 2, true, false); + std::vector<int> task_sizes = {5, 5}; + int n_samp = std::accumulate(task_sizes.begin(), task_sizes.end(), 0); - std::vector<double> value_1 = {3.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; - std::vector<double> value_2 = {1.10, 2.20, 3.10, 4.20, 5.10, 6.20, 7.10, 8.20, 9.10, 10.20}; - std::vector<double> value_3 = {3.0, -3.0, 5.0, -7.0, 9.0, -2.0, 4.0, -6.0, 8.0, -10.0}; + node_value_arrs::initialize_values_arr(task_sizes, {0, 0}, 3, 2, false); - node_ptr feat_1 = std::make_shared<FeatureNode>(0, "A", value_1, std::vector<double>(), Unit("m")); - node_ptr feat_2 = std::make_shared<FeatureNode>(1, "B", value_2, std::vector<double>(), Unit("m")); - node_ptr feat_3 = std::make_shared<FeatureNode>(2, "C", value_3, std::vector<double>(), Unit("s")); + std::vector<double> value_1(n_samp, 0.0); + std::vector<double> value_2(n_samp, 0.0); + std::vector<double> value_3(n_samp, 0.0); - _phi_0 = {feat_1, feat_2, feat_3}; - _prop = std::vector<double>(10, 0.0); - _prop_class = {0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0}; - _prop_log_reg = std::vector<double>(10, 0.0); - std::transform(value_2.begin(), value_2.begin() + _task_sizes[0], value_3.begin(), _prop.begin(), [](double v1, double v2){return v1 / (v2 * v2);}); - std::transform(value_2.begin() + _task_sizes[0], value_2.end(), value_3.begin() + _task_sizes[0], _prop.begin() + _task_sizes[0], [](double v1, double v2){return -6.5 + 1.25 * v1 / (v2 * v2);}); + _prop = std::vector<double>(n_samp, 0.0); + _prop_class = std::vector<double>(n_samp, 0.0); + _prop_log_reg = std::vector<double>(n_samp, 0.0); + + for(int ii = 0; ii < n_samp; ++ ii) + { + _prop_class[ii] = ii % 2; + value_1[ii] = static_cast<double>(ii + 1); + value_2[ii] = static_cast<double>(ii + 1) + 0.1 + 0.1 * (ii % 2); + value_3[ii] = static_cast<double>(2 * ii + 1) * std::pow(-1, ii); + } + + value_1[0] = 3.0; + value_3[0] = 3.0; + + std::transform(value_2.begin(), value_2.begin() + task_sizes[0], value_3.begin(), _prop.begin(), [](double v1, double v2){return v1 / (v2 * v2);}); + std::transform(value_2.begin() + task_sizes[0], value_2.end(), value_3.begin() + task_sizes[0], _prop.begin() + task_sizes[0], [](double v1, double v2){return -6.5 + 1.25 * v1 / (v2 * v2);}); std::transform(value_2.begin(), value_2.end(), value_3.begin(), _prop_log_reg.begin(), [](double v1, double v2){return v1 / (v2 * v2);}); - _allowed_ops = {"sq", "cb", "div", "add"}; - _allowed_param_ops = {}; + FeatureNode feat_1(0, "A", value_1, std::vector<double>(), Unit("m")); + FeatureNode feat_2(1, "B", value_2, std::vector<double>(), Unit("m")); + FeatureNode feat_3(2, "C", value_3, std::vector<double>(), Unit("s")); + + std::vector<FeatureNode> phi_0 = {feat_1, feat_2, feat_3}; + + _inputs.set_phi_0(phi_0); + _inputs.set_task_sizes_train(task_sizes); + _inputs.set_allowed_ops({"sq", "cb", "div", "add"}); + _inputs.set_allowed_param_ops({}); + _inputs.set_cross_cor_max(1.0); + _inputs.set_l_bound(1e-50); + _inputs.set_u_bound(1e50); + _inputs.set_n_rung_store(1); + _inputs.set_max_rung(2); + _inputs.set_n_sis_select(10); + _inputs.set_n_rung_generate(0); + _inputs.set_max_param_depth(0); + _inputs.set_reparam_residual(false); } - std::vector<node_ptr> _phi_0; - std::vector<std::string> _allowed_ops; - std::vector<std::string> _allowed_param_ops; + InputParser _inputs; std::vector<double> _prop; std::vector<double> _prop_log_reg; std::vector<double> _prop_class; - std::vector<int> _task_sizes; }; TEST_F(FeatSpaceTest, RegTest) { -#ifdef PARAMETERIZE - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop, - _task_sizes, - "regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#else - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop, - _task_sizes, - "regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#endif + _inputs.set_calc_type("regression"); + _inputs.set_prop_train(_prop); + + FeatureSpace feat_space(_inputs); feat_space.sis(_prop); - EXPECT_EQ(feat_space.task_sizes()[0], _task_sizes[0]); + EXPECT_EQ(feat_space.task_sizes_train()[0], _inputs.task_sizes_train()[0]); EXPECT_STREQ(feat_space.feature_space_file().c_str(), "feature_space/selected_features.txt"); EXPECT_EQ(feat_space.l_bound(), 1e-50); EXPECT_EQ(feat_space.u_bound(), 1e50); EXPECT_EQ(feat_space.max_rung(), 2); EXPECT_EQ(feat_space.n_sis_select(), 10); - EXPECT_EQ(feat_space.n_samp(), 10); + EXPECT_EQ(feat_space.n_samp_train(), 10); EXPECT_EQ(feat_space.n_feat(), 154); EXPECT_EQ(feat_space.n_rung_store(), 1); EXPECT_EQ(feat_space.n_rung_generate(), 0); EXPECT_LT(std::abs(feat_space.phi_selected()[0]->value()[0] - _prop[0]), 1e-10); - EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-10); - EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-10); + EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-10); + EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-10); boost::filesystem::remove_all("feature_space/"); } TEST_F(FeatSpaceTest, ProjectGenTest) { -#ifdef PARAMETERIZE - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop, - _task_sizes, - "regression", - 2, - 10, - 1, - 1, - 1.0, - 1e-50, - 1e50 - ); -#else - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop, - _task_sizes, - "regression", - 2, - 10, - 1, - 1, - 1.0, - 1e-50, - 1e50 - ); -#endif + _inputs.set_n_rung_generate(1); + _inputs.set_calc_type("regression"); + _inputs.set_prop_train(_prop); + + FeatureSpace feat_space(_inputs); + feat_space.sis(_prop); - EXPECT_EQ(feat_space.task_sizes()[0], _task_sizes[0]); + EXPECT_EQ(feat_space.task_sizes_train()[0], _inputs.task_sizes_train()[0]); EXPECT_STREQ(feat_space.feature_space_file().c_str(), "feature_space/selected_features.txt"); EXPECT_EQ(feat_space.l_bound(), 1e-50); EXPECT_EQ(feat_space.u_bound(), 1e50); EXPECT_EQ(feat_space.max_rung(), 2); EXPECT_EQ(feat_space.n_sis_select(), 10); - EXPECT_EQ(feat_space.n_samp(), 10); + EXPECT_EQ(feat_space.n_samp_train(), 10); EXPECT_EQ(feat_space.n_feat(), 16); EXPECT_EQ(feat_space.n_rung_store(), 1); EXPECT_EQ(feat_space.n_rung_generate(), 1); EXPECT_LT(std::abs(feat_space.phi_selected()[0]->value()[0] - _prop[0]), 1e-10); - EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-10); - EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-10); + EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-10); + EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-10); boost::filesystem::remove_all("feature_space/"); } TEST_F(FeatSpaceTest, MaxCorrTest) { -#ifdef PARAMETERIZE - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop, - _task_sizes, - "regression", - 2, - 10, - 1, - 0, - 0.99, - 1e-50, - 1e50 - ); -#else - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop, - _task_sizes, - "regression", - 2, - 10, - 1, - 0, - 0.99, - 1e-50, - 1e50 - ); -#endif + _inputs.set_cross_cor_max(0.95); + _inputs.set_calc_type("regression"); + _inputs.set_prop_train(_prop); + + FeatureSpace feat_space(_inputs); + feat_space.sis(_prop); - EXPECT_EQ(feat_space.task_sizes()[0], _task_sizes[0]); + EXPECT_EQ(feat_space.task_sizes_train()[0], _inputs.task_sizes_train()[0]); EXPECT_STREQ(feat_space.feature_space_file().c_str(), "feature_space/selected_features.txt"); EXPECT_EQ(feat_space.l_bound(), 1e-50); EXPECT_EQ(feat_space.u_bound(), 1e50); EXPECT_EQ(feat_space.max_rung(), 2); EXPECT_EQ(feat_space.n_sis_select(), 10); - EXPECT_EQ(feat_space.n_samp(), 10); + EXPECT_EQ(feat_space.n_samp_train(), 10); EXPECT_EQ(feat_space.n_feat(), 154); EXPECT_EQ(feat_space.n_rung_store(), 1); EXPECT_EQ(feat_space.n_rung_generate(), 0); EXPECT_LT(std::abs(feat_space.phi_selected()[0]->value()[0] - _prop[0]), 1e-10); - EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-10); - EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-10); + EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-10); + EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-10); boost::filesystem::remove_all("feature_space/"); } TEST_F(FeatSpaceTest, LogRegTest) { -#ifdef PARAMETERIZE - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop_log_reg, - {10}, - "log_regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#else - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop_log_reg, - {10}, - "log_regression", - 2, - 10, - 1, - 0, - 1.0, - 1e-50, - 1e50 - ); -#endif + _inputs.set_calc_type("log_regression"); + _inputs.set_prop_train(_prop_log_reg); + + FeatureSpace feat_space(_inputs); + std::transform(_prop_log_reg.begin(), _prop_log_reg.end(), _prop_log_reg.begin(), [](double pl){return std::log(pl);}); feat_space.sis(_prop_log_reg); - EXPECT_EQ(feat_space.task_sizes()[0], 10); + EXPECT_EQ(feat_space.task_sizes_train()[0], _inputs.task_sizes_train()[0]); EXPECT_STREQ(feat_space.feature_space_file().c_str(), "feature_space/selected_features.txt"); EXPECT_EQ(feat_space.l_bound(), 1e-50); EXPECT_EQ(feat_space.u_bound(), 1e50); EXPECT_EQ(feat_space.max_rung(), 2); EXPECT_EQ(feat_space.n_sis_select(), 10); - EXPECT_EQ(feat_space.n_samp(), 10); + EXPECT_EQ(feat_space.n_samp_train(), 10); EXPECT_EQ(feat_space.n_feat(), 154); EXPECT_EQ(feat_space.n_rung_store(), 1); EXPECT_EQ(feat_space.n_rung_generate(), 0); std::vector<double> log_a(10, 0.0); EXPECT_LT(std::abs(1.0 - util_funcs::log_r2(feat_space.phi_selected()[0]->value_ptr(), _prop_log_reg.data(), log_a.data(), 10)), 1e-8); - EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-8); - EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-8); + EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-8); + EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-8); boost::filesystem::remove_all("feature_space/"); } TEST_F(FeatSpaceTest, ClassTest) { -#ifdef PARAMETERIZE - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _allowed_param_ops, - _prop_class, - {10}, - "classification", - 0, - 1, - 0, - 0, - 1.0, - 1e-50, - 1e50 - ); -#else - FeatureSpace feat_space( - mpi_setup::comm, - _phi_0, - _allowed_ops, - _prop_class, - {10}, - "classification", - 0, - 1, - 0, - 0, - 1.0, - 1e-50, - 1e50 - ); -#endif + _inputs.set_max_rung(0); + _inputs.set_n_sis_select(1); + _inputs.set_n_rung_store(0); + _inputs.set_calc_type("classification"); + _inputs.set_prop_train(_prop_class); + + FeatureSpace feat_space(_inputs); feat_space.sis(_prop_class); - EXPECT_EQ(feat_space.task_sizes()[0], 10); + EXPECT_EQ(feat_space.task_sizes_train()[0], _inputs.task_sizes_train()[0]); EXPECT_STREQ(feat_space.feature_space_file().c_str(), "feature_space/selected_features.txt"); EXPECT_EQ(feat_space.l_bound(), 1e-50); EXPECT_EQ(feat_space.u_bound(), 1e50); EXPECT_EQ(feat_space.max_rung(), 0); EXPECT_EQ(feat_space.n_sis_select(), 1); - EXPECT_EQ(feat_space.n_samp(), 10); + EXPECT_EQ(feat_space.n_samp_train(), 10); EXPECT_EQ(feat_space.n_feat(), 3); EXPECT_EQ(feat_space.n_rung_store(), 0); EXPECT_EQ(feat_space.n_rung_generate(), 0); EXPECT_LT(std::abs(feat_space.phi_selected()[0]->value()[1] + 3.0), 1e-10); - EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-10); - EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _phi_0[0]->value()[0]), 1e-10); + EXPECT_LT(std::abs(feat_space.phi0()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-10); + EXPECT_LT(std::abs(feat_space.phi()[0]->value()[0] - _inputs.phi_0()[0].value()[0]), 1e-10); boost::filesystem::remove_all("feature_space/"); } diff --git a/tests/googletest/feature_creation/parameterization/test_abs_diff_node.cc b/tests/googletest/feature_creation/parameterization/test_abs_diff_node.cc index bf271c283738a711ce1828d380c6e08caddb84f8..4a39804331815cebd145f1a08c85eaea2a1949e0 100644 --- a/tests/googletest/feature_creation/parameterization/test_abs_diff_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_abs_diff_node.cc @@ -28,27 +28,29 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(90, 10, 2, 2, true, true); - _task_sizes_train = {90}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); + - std::vector<double> value_1(90, 0.0); - std::vector<double> value_2(90, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-10.0, 10.0); std::uniform_real_distribution<double> distribution_params(-2.50, 2.50); - for(int ii = 0; ii < 90; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = distribution_feats(generator); } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = distribution_feats(generator); @@ -60,9 +62,9 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(90, 0.0); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); - allowed_op_funcs::abs_diff(90, _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + allowed_op_funcs::abs_diff(_task_sizes_train[0], _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -74,6 +76,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_abs_node.cc b/tests/googletest/feature_creation/parameterization/test_abs_node.cc index 10f6ed07532c1770c51c1af36fc86d6ff320c02b..3705565571cabc9f9249a039795de9e2bab51426 100644 --- a/tests/googletest/feature_creation/parameterization/test_abs_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_abs_node.cc @@ -28,22 +28,22 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 1, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; - std::vector<double> value_1(900, 0.0); + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 1, 2, true); - std::vector<double> test_value_1(10, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); std::uniform_real_distribution<double> distribution_params(-2.50, 2.50); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) value_1[ii] = distribution_feats(generator); - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) test_value_1[ii] = distribution_feats(generator); _feat_1 = std::make_shared<FeatureNode>(0, "A", value_1, test_value_1, Unit("m")); @@ -52,8 +52,8 @@ namespace _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::abs(900, _phi[0]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::abs(_task_sizes_train[0], _phi[0]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -64,6 +64,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_add_node.cc b/tests/googletest/feature_creation/parameterization/test_add_node.cc index d94fb5d5638a52b0d1eee2c15e4b8f54aa547501..185d5ca2dfeb740e5fee1775787a5329bb9decc6 100644 --- a/tests/googletest/feature_creation/parameterization/test_add_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_add_node.cc @@ -29,27 +29,28 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(90, 10, 2, 2, true, true); - _task_sizes_train = {90}; + _task_sizes_test = {10}; + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); + - std::vector<double> value_1(90, 0.0); - std::vector<double> value_2(90, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); std::uniform_real_distribution<double> distribution_params(-2.50, 2.50); - for(int ii = 0; ii < 90; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = distribution_feats(generator); } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = distribution_feats(generator); @@ -61,8 +62,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(90, 0.0); - allowed_op_funcs::add(90, _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::add(_task_sizes_train[0], _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -74,6 +75,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_cb_node.cc b/tests/googletest/feature_creation/parameterization/test_cb_node.cc index e975d4b23fb85d0572a1183e5d54b39db6e94cc2..34cb54cdd87e43e423773a2880105d3de91dc680 100644 --- a/tests/googletest/feature_creation/parameterization/test_cb_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_cb_node.cc @@ -29,28 +29,30 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 2, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); + - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-500.0, 500.0); std::uniform_real_distribution<double> distribution_params(1e-10, 1.50); std::normal_distribution<double> distribution_err(0.0, 0.01); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -62,8 +64,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::cb(900, _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::cb(_task_sizes_train[0], _phi[1]->value_ptr(), _alpha, _a, _prop.data()); std::transform(_prop.begin(), _prop.end(), _prop.begin(), [&](double p){return p + distribution_err(generator);}); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); @@ -76,6 +78,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_cbrt_node.cc b/tests/googletest/feature_creation/parameterization/test_cbrt_node.cc index 0535e6f4b7ebc37cc86e6b7442d23364021ff1b3..4139dc438141be35735f3831ecf6cb5beaf3ea62 100644 --- a/tests/googletest/feature_creation/parameterization/test_cbrt_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_cbrt_node.cc @@ -29,27 +29,28 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 2, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); + - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(10.0, 5000.0); std::uniform_real_distribution<double> distribution_params(0.5, 1.50); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -61,8 +62,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = std::pow(distribution_params(generator), 3.0); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::cbrt(900, _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::cbrt(_task_sizes_train[0], _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -74,6 +75,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_cos_node.cc b/tests/googletest/feature_creation/parameterization/test_cos_node.cc index a6f445561cf622a44e45b7020c877b44f494b5f4..34c554e6a2fe9688de4c377c0a1e14d9df032447 100644 --- a/tests/googletest/feature_creation/parameterization/test_cos_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_cos_node.cc @@ -30,26 +30,27 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 3, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 3, 2, true); + - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-6.23, 6.23); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = distribution_feats(generator); } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = distribution_feats(generator); @@ -65,8 +66,8 @@ namespace _a = 0.143; _alpha = 1.05; - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::cos(900, _phi[0]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::cos(_task_sizes_train[0], _phi[0]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -79,6 +80,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_div_node.cc b/tests/googletest/feature_creation/parameterization/test_div_node.cc index fa85365646170513432e88cabc69805236d82fc3..f5b50f5dfebbef5e3d63e039f4e715313e0bbf83 100644 --- a/tests/googletest/feature_creation/parameterization/test_div_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_div_node.cc @@ -28,27 +28,29 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(90, 10, 2, 2, true, true); - _task_sizes_train = {90}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); + - std::vector<double> value_1(90, 0.0); - std::vector<double> value_2(90, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); std::uniform_real_distribution<double> distribution_params(1e-10, 2.50); - for(int ii = 0; ii < 90; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -60,8 +62,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(90, 0.0); - allowed_op_funcs::div(90, _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::div(_task_sizes_train[0], _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -73,6 +75,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_exp_node.cc b/tests/googletest/feature_creation/parameterization/test_exp_node.cc index 0e5d28bde730a7f44a9df3894650ffe59796876f..6e91b250171bd1c85579e31809aaae95baf2b486 100644 --- a/tests/googletest/feature_creation/parameterization/test_exp_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_exp_node.cc @@ -30,27 +30,29 @@ namespace void SetUp() override { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 3, 2, true, true); _task_sizes_train = {900}; + _task_sizes_test = {10}; - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 3, 2, true); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); + + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-2.0, 2.0); std::uniform_real_distribution<double> distribution_params(0.75, 1.25); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -67,8 +69,8 @@ namespace _a = std::log(distribution_params(generator)); _alpha = distribution_params(generator); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::exp(900, _phi[0]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::exp(_task_sizes_train[0], _phi[0]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -81,6 +83,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_inv_node.cc b/tests/googletest/feature_creation/parameterization/test_inv_node.cc index d05ebaf073268026c3bc6d4e1d156ca63b66116f..e1f8ecca477927a1ec0129ce6b9747a2e974537b 100644 --- a/tests/googletest/feature_creation/parameterization/test_inv_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_inv_node.cc @@ -28,27 +28,29 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(90, 10, 2, 2, true, true); - _task_sizes_train = {90}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); + - std::vector<double> value_1(90, 0.0); - std::vector<double> value_2(90, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); std::uniform_real_distribution<double> distribution_params(1e-10, 2.50); - for(int ii = 0; ii < 90; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -60,8 +62,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(90, 0.0); - allowed_op_funcs::inv(90, _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::inv(_task_sizes_train[0], _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -73,6 +75,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_log_node.cc b/tests/googletest/feature_creation/parameterization/test_log_node.cc index 07216f4d70ff65adb99d602ccb05673b467b58d9..f716721b344558dde98c0fdc2f506cd76f90e262 100644 --- a/tests/googletest/feature_creation/parameterization/test_log_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_log_node.cc @@ -29,30 +29,31 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(900, 10, 3, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 3, 2, true); - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); - std::vector<double> value_3(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); + std::vector<double> value_3(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); - std::vector<double> test_value_3(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); + std::vector<double> test_value_3(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-10.0, 10.0); std::uniform_real_distribution<double> distribution_params(0.1, 1.50); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = distribution_feats(generator); value_3[ii] = std::exp(distribution_feats(generator)); } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = distribution_feats(generator); @@ -70,8 +71,8 @@ namespace _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::log(900, _phi[2]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::log(_task_sizes_train[0], _phi[2]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -84,6 +85,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_mult_node.cc b/tests/googletest/feature_creation/parameterization/test_mult_node.cc index af26be8822b124edd6f39b779c9607269698ce9e..65e8e036c22b179380d1b8992f15f023fdfed6f1 100644 --- a/tests/googletest/feature_creation/parameterization/test_mult_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_mult_node.cc @@ -28,27 +28,28 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 2, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); std::uniform_real_distribution<double> distribution_params(-2.50, 2.50); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = distribution_feats(generator); } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = distribution_feats(generator); @@ -60,8 +61,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::mult(900, _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::mult(_task_sizes_train[0], _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -73,6 +74,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_neg_exp_node.cc b/tests/googletest/feature_creation/parameterization/test_neg_exp_node.cc index da2ead06f7d22af3b1e7dad8c30d373893f68ca8..ff9ad5cc52b6a8ff9d9f480fdeea481fef0d2fdd 100644 --- a/tests/googletest/feature_creation/parameterization/test_neg_exp_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_neg_exp_node.cc @@ -31,27 +31,28 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 3, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 3, 2, true); - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-2.0, 2.0); std::uniform_real_distribution<double> distribution_params(0.75, 1.25); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -68,8 +69,8 @@ namespace _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::neg_exp(900, _phi[0]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::neg_exp(_task_sizes_train[0], _phi[0]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -82,6 +83,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_sin_node.cc b/tests/googletest/feature_creation/parameterization/test_sin_node.cc index 083656ad6ce243aa0065553dcadba5de4c814498..dd19a4e34221adc5fdc36f089b3527d7312fde76 100644 --- a/tests/googletest/feature_creation/parameterization/test_sin_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_sin_node.cc @@ -30,26 +30,27 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 3, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 3, 2, true); - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-6.23, 6.23); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = distribution_feats(generator); } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = distribution_feats(generator); @@ -65,8 +66,8 @@ namespace _a = 0.143; _alpha = 1.05; - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::sin(900, _phi[0]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::sin(_task_sizes_train[0], _phi[0]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -79,6 +80,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_six_pow_node.cc b/tests/googletest/feature_creation/parameterization/test_six_pow_node.cc index 9aa141fb291f8b136ad656c027ada1909dc96828..25369ffdc5080cf5d8b2d82cd0f35d93e7a1cc90 100644 --- a/tests/googletest/feature_creation/parameterization/test_six_pow_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_six_pow_node.cc @@ -28,27 +28,28 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 2, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-50.00, 50.00); std::uniform_real_distribution<double> distribution_params(1e-10, 2.00); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -60,8 +61,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::sixth_pow(900, _phi[0]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::sixth_pow(_task_sizes_train[0], _phi[0]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -73,6 +74,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_sq_node.cc b/tests/googletest/feature_creation/parameterization/test_sq_node.cc index fc8a88af0387061b64cf9112cdb41d467822f7a6..63c3f241d17622f30cee468b7df39825bcb1d17f 100644 --- a/tests/googletest/feature_creation/parameterization/test_sq_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_sq_node.cc @@ -28,27 +28,28 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(90, 10, 2, 2, true, true); - _task_sizes_train = {90}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); - std::vector<double> value_1(90, 0.0); - std::vector<double> value_2(90, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); std::uniform_real_distribution<double> distribution_params(1e-10, 2.50); - for(int ii = 0; ii < 90; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -60,8 +61,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(90, 0.0); - allowed_op_funcs::sq(90, _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::sq(_task_sizes_train[0], _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -73,6 +74,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_sqrt_node.cc b/tests/googletest/feature_creation/parameterization/test_sqrt_node.cc index 14427a905196c6392ab5dc1fcf6913dd13a959e4..6b2dc24f2665650d0e9b7fb6bc9f6faf91154393 100644 --- a/tests/googletest/feature_creation/parameterization/test_sqrt_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_sqrt_node.cc @@ -28,27 +28,28 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(900, 10, 2, 2, true, true); - _task_sizes_train = {900}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); - std::vector<double> value_1(900, 0.0); - std::vector<double> value_2(900, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(1.0, 500.0); std::uniform_real_distribution<double> distribution_params(0.5, 1.50); - for(int ii = 0; ii < 900; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = std::abs(distribution_feats(generator)) + 1e-10; @@ -60,8 +61,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = std::pow(distribution_params(generator), 2.0); - _prop = std::vector<double>(900, 0.0); - allowed_op_funcs::sqrt(900, _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::sqrt(_task_sizes_train[0], _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -73,6 +74,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/parameterization/test_sub_node.cc b/tests/googletest/feature_creation/parameterization/test_sub_node.cc index 7d97ed99e37520fa61149df86b10c32a08777bb0..b7da5988ab2550bf168995587b33f7ca1b970bbb 100644 --- a/tests/googletest/feature_creation/parameterization/test_sub_node.cc +++ b/tests/googletest/feature_creation/parameterization/test_sub_node.cc @@ -28,27 +28,28 @@ namespace { nlopt_wrapper::MAX_PARAM_DEPTH = 1; - node_value_arrs::initialize_values_arr(90, 10, 2, 2, true, true); - _task_sizes_train = {90}; + _task_sizes_test = {10}; + + node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, true); - std::vector<double> value_1(90, 0.0); - std::vector<double> value_2(90, 0.0); + std::vector<double> value_1(_task_sizes_train[0], 0.0); + std::vector<double> value_2(_task_sizes_train[0], 0.0); - std::vector<double> test_value_1(10, 0.0); - std::vector<double> test_value_2(10, 0.0); + std::vector<double> test_value_1(_task_sizes_test[0], 0.0); + std::vector<double> test_value_2(_task_sizes_test[0], 0.0); std::default_random_engine generator; std::uniform_real_distribution<double> distribution_feats(-50.0, 50.0); std::uniform_real_distribution<double> distribution_params(-2.50, 2.50); - for(int ii = 0; ii < 90; ++ii) + for(int ii = 0; ii < _task_sizes_train[0]; ++ii) { value_1[ii] = distribution_feats(generator); value_2[ii] = distribution_feats(generator); } - for(int ii = 0; ii < 10; ++ii) + for(int ii = 0; ii < _task_sizes_test[0]; ++ii) { test_value_1[ii] = distribution_feats(generator); test_value_2[ii] = distribution_feats(generator); @@ -60,8 +61,8 @@ namespace _phi = {_feat_1, _feat_2}; _a = distribution_params(generator); _alpha = distribution_params(generator); - _prop = std::vector<double>(90, 0.0); - allowed_op_funcs::sub(90, _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); + _prop = std::vector<double>(_task_sizes_train[0], 0.0); + allowed_op_funcs::sub(_task_sizes_train[0], _phi[0]->value_ptr(), _phi[1]->value_ptr(), _alpha, _a, _prop.data()); _optimizer = nlopt_wrapper::get_optimizer("regression",_task_sizes_train, _prop, 1); } @@ -73,6 +74,7 @@ namespace std::vector<node_ptr> _phi; std::vector<double> _prop; std::vector<int> _task_sizes_train; + std::vector<int> _task_sizes_test; double _a; double _alpha; diff --git a/tests/googletest/feature_creation/utils/test_utils.cc b/tests/googletest/feature_creation/utils/test_utils.cc index edd112a436281c78409f20a1c8f8a3244fd6ac65..5db1d69eca9d6e615d82d5ce05ff52ce12f9b64e 100644 --- a/tests/googletest/feature_creation/utils/test_utils.cc +++ b/tests/googletest/feature_creation/utils/test_utils.cc @@ -22,7 +22,7 @@ namespace protected: void SetUp() override { - node_value_arrs::initialize_values_arr(4, 1, 3, 2); + node_value_arrs::initialize_values_arr({4}, {1}, 3, 2, false); std::vector<double> value_1 = {-1.0, -2.0, -3.0, -4.0}; std::vector<double> test_value_1 = {50.0}; diff --git a/tests/googletest/feature_creation/value_storage/test_value_storage.cc b/tests/googletest/feature_creation/value_storage/test_value_storage.cc index b80cfffa98997b0f0fa1f4235713a44d8e990005..edfbe6d03a81e6032d40948beb431140f658adf4 100644 --- a/tests/googletest/feature_creation/value_storage/test_value_storage.cc +++ b/tests/googletest/feature_creation/value_storage/test_value_storage.cc @@ -20,7 +20,7 @@ namespace { //test mean calculations TEST(ValueStorage, ValueStorageTest) { - node_value_arrs::initialize_values_arr(5, 2, 1, 2, true, true); + node_value_arrs::initialize_values_arr({5}, {2}, 1, 2, true); EXPECT_EQ(node_value_arrs::N_SAMPLES, 5); EXPECT_EQ(node_value_arrs::N_SAMPLES_TEST, 2); EXPECT_EQ(node_value_arrs::N_RUNGS_STORED, 0); @@ -40,15 +40,7 @@ namespace { EXPECT_EQ(node_value_arrs::N_RUNGS_STORED, 1); EXPECT_EQ(node_value_arrs::N_STORE_FEATURES, 2); EXPECT_EQ(node_value_arrs::VALUES_ARR.size(), 10); - EXPECT_EQ(node_value_arrs::TEST_VALUES_ARR.size(), 2); - - node_value_arrs::resize_values_arr(1, 2); - EXPECT_EQ(node_value_arrs::N_SAMPLES, 5); - EXPECT_EQ(node_value_arrs::N_SAMPLES_TEST, 2); - EXPECT_EQ(node_value_arrs::N_RUNGS_STORED, 1); - EXPECT_EQ(node_value_arrs::N_STORE_FEATURES, 2); - EXPECT_EQ(node_value_arrs::VALUES_ARR.size(), 10); - EXPECT_EQ(node_value_arrs::TEST_VALUES_ARR.size(), 2); + EXPECT_EQ(node_value_arrs::TEST_VALUES_ARR.size(), 4); node_value_arrs::initialize_d_matrix_arr(); EXPECT_EQ(node_value_arrs::N_SELECTED, 0); diff --git a/tests/googletest/inputs/data.csv b/tests/googletest/inputs/data.csv new file mode 100644 index 0000000000000000000000000000000000000000..ee3d4cd2cc7a2d326f44369b2600601aa663824b --- /dev/null +++ b/tests/googletest/inputs/data.csv @@ -0,0 +1,5 @@ +Sample,task,property (m),A (m) +a,task_1,1.0,1.0 +b,task_1,4.0,2.0 +c,task_2,9.0,3.0 +d,task_1,16.0,4.0 diff --git a/tests/googletest/inputs/input_parser.cc b/tests/googletest/inputs/input_parser.cc new file mode 100644 index 0000000000000000000000000000000000000000..6b9e72663de7ef9d78ca37edf1fd1c0084e67a73 --- /dev/null +++ b/tests/googletest/inputs/input_parser.cc @@ -0,0 +1,260 @@ +// Copyright 2021 Thomas A. R. Purcell +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include <inputs/InputParser.hpp> +#include "gtest/gtest.h" + +namespace +{ + class InputParserTests : public ::testing::Test + { + protected: + void SetUp() override + { + node_value_arrs::finialize_values_arr(); + _sample_ids_train = {"a", "b", "c"}; + _sample_ids_test = {"d"}; + _task_names = {"task_1", "task_2"}; + _allowed_param_ops = {"log"}; + _allowed_ops = {"sq", "cb"}; + _prop_train = {1.0, 4.0, 9.0}; + _prop_test = {16.0}; + _leave_out_inds = {3}; + _task_sizes_train = {2, 1}; + _task_sizes_test = {1, 0}; + _phi_0 = {FeatureNode(0, "feat_1", {1.0, 2.0, 3.0}, {4.0}, Unit("m"))}; + _prop_unit = Unit("m"); + _filename = "googletest/inputs/sisso.json"; + _data_file = "googletest/inputs/data.csv"; + _prop_key = "property"; + _prop_label = "property"; + _task_key = "task"; + _calc_type = "regression"; + _cross_cor_max = 1.0; + _l_bound = 1e-5; + _u_bound = 1e8; + _n_dim = 2; + _max_rung = 1; + _n_rung_store = 1; + _n_rung_generate = 0; + _n_sis_select = 1; + _n_residual = 1; + _n_models_store = 1; + _max_param_depth = 1; + _nlopt_seed = 10; + _fix_intercept = false; + _global_param_opt = true; + _reparam_residual = true; + } + + std::vector<std::string> _sample_ids_train; //!< Vector storing all sample ids for the training samples + std::vector<std::string> _sample_ids_test; //!< Vector storing all sample ids for the test samples + std::vector<std::string> _task_names; //!< Vector storing the ID of the task names + + std::vector<std::string> _allowed_param_ops; //!< Vector containing all allowed operators strings for operators with free parameters + std::vector<std::string> _allowed_ops; //!< Vector containing all allowed operators strings + std::vector<double> _prop_train; //!< The value of the property to evaluate the loss function against for the training set + std::vector<double> _prop_test; //!< The value of the property to evaluate the loss function against for the test set + + std::vector<int> _leave_out_inds; //!< List of indexes from the initial data file in the test set + std::vector<int> _task_sizes_train; //!< Number of training samples per task + std::vector<int> _task_sizes_test; //!< Number of testing samples per task + + std::vector<FeatureNode> _phi_0; //!< A vector of FeatureNodes for the primary feature space + + Unit _prop_unit; //!< The Unit of the property + + std::string _filename; //!< Name of the input file + std::string _data_file; //!< Name of the data file + std::string _prop_key; //!< Key used to find the property column in the data file + std::string _prop_label; //!< The label of the property + std::string _task_key; //!< Key used to find the task column in the data file + std::string _calc_type; //!< The type of LossFunction to use when projecting the features onto a property + + std::shared_ptr<MPI_Interface> _mpi_comm; //!< The MPI communicator for the calculation + + double _cross_cor_max; //!< Maximum cross-correlation used for selecting features + double _l_bound; //!< The lower bound for the maximum absolute value of the features + double _u_bound; //!< The upper bound for the maximum absolute value of the features + + int _n_dim; //!< The maximum number of features allowed in the linear model + int _max_rung; //!< Maximum rung for the feature creation + int _n_rung_store; //!< The number of rungs to calculate and store the value of the features for all samples + int _n_rung_generate; //!< Either 0 or 1, and is the number of rungs to generate on the fly during SIS + int _n_sis_select; //!< Number of features to select during each SIS iteration + int _n_samp; //!< Number of samples in the data set + int _n_samp_train; //!< Number of samples in the training set + int _n_samp_test; //!< Number of samples in the test set + int _n_residual; //!< Number of residuals to pass to the next sis model + int _n_models_store; //!< The number of models to output to files + int _max_param_depth; //!< The maximum depth in the binary expression tree to set non-linear optimization + int _nlopt_seed; //!< The seed used for the nlOpt library + + bool _fix_intercept; //!< If true the bias term is fixed at 0 + bool _global_param_opt; //!< True if global optimization is requested for non-linear optimization of parameters (Can break reproducibility) + bool _reparam_residual; //!< If True then reparameterize features using the residuals of each model + + }; + + TEST_F(InputParserTests, DefaultConsructor) + { + InputParser inputs; + inputs.set_task_sizes_train(_task_sizes_train); + EXPECT_EQ(inputs.task_sizes_train()[0], _task_sizes_train[0]); + + inputs.set_task_sizes_test(_task_sizes_test); + EXPECT_EQ(inputs.task_sizes_test()[0], _task_sizes_test[0]); + + inputs.set_sample_ids_train(_sample_ids_train); + EXPECT_EQ(inputs.sample_ids_train()[0], _sample_ids_train[0]); + + inputs.set_sample_ids_test(_sample_ids_test); + EXPECT_EQ(inputs.sample_ids_test()[0], _sample_ids_test[0]); + + inputs.set_task_names(_task_names); + EXPECT_EQ(inputs.task_names()[0], _task_names[0]); + + inputs.set_allowed_param_ops(_allowed_param_ops); + EXPECT_EQ(inputs.allowed_param_ops()[0], _allowed_param_ops[0]); + + inputs.set_allowed_ops(_allowed_ops); + EXPECT_EQ(inputs.allowed_ops()[0], _allowed_ops[0]); + + inputs.set_prop_train(_prop_train); + EXPECT_EQ(inputs.prop_train()[0], _prop_train[0]); + + inputs.set_prop_test(_prop_test); + EXPECT_EQ(inputs.prop_test()[0], _prop_test[0]); + + inputs.set_leave_out_inds(_leave_out_inds); + EXPECT_EQ(inputs.leave_out_inds()[0], _leave_out_inds[0]); + + EXPECT_EQ(inputs.n_samp(), 4); + EXPECT_EQ(inputs.n_samp_test(), 1); + EXPECT_EQ(inputs.n_samp_train(), 3); + + inputs.set_phi_0(_phi_0); + EXPECT_EQ(inputs.phi_0()[0].feat_ind(), _phi_0[0].feat_ind()); + EXPECT_EQ(inputs.phi_0_ptrs()[0]->feat_ind(), _phi_0[0].feat_ind()); + + inputs.set_prop_unit(_prop_unit); + EXPECT_EQ(inputs.prop_unit(), _prop_unit); + + inputs.set_filename(_filename); + EXPECT_EQ(inputs.filename(), _filename); + + inputs.set_data_file(_data_file); + EXPECT_EQ(inputs.data_file(), _data_file); + + inputs.set_prop_key(_prop_key); + EXPECT_EQ(inputs.prop_key(), _prop_key); + + inputs.set_prop_label(_prop_label); + EXPECT_EQ(inputs.prop_label(), _prop_label); + + inputs.set_task_key(_task_key); + EXPECT_EQ(inputs.task_key(), _task_key); + + inputs.set_calc_type(_calc_type); + EXPECT_EQ(inputs.calc_type(), _calc_type); + + inputs.set_cross_cor_max(_cross_cor_max); + EXPECT_EQ(inputs.cross_cor_max(), _cross_cor_max); + + inputs.set_l_bound(_l_bound); + EXPECT_EQ(inputs.l_bound(), _l_bound); + + inputs.set_u_bound(_u_bound); + EXPECT_EQ(inputs.u_bound(), _u_bound); + + inputs.set_n_dim(_n_dim); + EXPECT_EQ(inputs.n_dim(), _n_dim); + + inputs.set_max_rung(_max_rung); + EXPECT_EQ(inputs.max_rung(), _max_rung); + + inputs.set_n_rung_store(_n_rung_store); + EXPECT_EQ(inputs.n_rung_store(), _n_rung_store); + + inputs.set_n_rung_generate(_n_rung_generate); + EXPECT_EQ(inputs.n_rung_generate(), _n_rung_generate); + + inputs.set_n_sis_select(_n_sis_select); + EXPECT_EQ(inputs.n_sis_select(), _n_sis_select); + + inputs.set_n_residual(_n_residual); + EXPECT_EQ(inputs.n_residual(), _n_residual); + + inputs.set_n_models_store(_n_models_store); + EXPECT_EQ(inputs.n_models_store(), _n_models_store); + + inputs.set_max_param_depth(_max_param_depth); + EXPECT_EQ(inputs.max_param_depth(), _max_param_depth); + + inputs.set_nlopt_seed(_nlopt_seed); + EXPECT_EQ(inputs.nlopt_seed(), _nlopt_seed); + + inputs.set_fix_intercept(_fix_intercept); + EXPECT_EQ(inputs.fix_intercept(), _fix_intercept); + + inputs.set_global_param_opt(_global_param_opt); + EXPECT_EQ(inputs.global_param_opt(), _global_param_opt); + + inputs.set_reparam_residual(_reparam_residual); + EXPECT_EQ(inputs.reparam_residual(), _reparam_residual); + } + + TEST_F(InputParserTests, FileConsructor) + { + boost::property_tree::ptree propTree; + boost::property_tree::json_parser::read_json(_filename, propTree); + InputParser inputs(propTree, _filename, mpi_setup::comm); + EXPECT_EQ(inputs.sample_ids_train()[0], _sample_ids_train[0]); + EXPECT_EQ(inputs.sample_ids_test()[0], _sample_ids_test[0]); + EXPECT_EQ(inputs.task_names()[0], _task_names[0]); + EXPECT_EQ(inputs.allowed_param_ops()[0], _allowed_param_ops[0]); + EXPECT_EQ(inputs.allowed_ops()[0], _allowed_ops[0]); + EXPECT_EQ(inputs.prop_train()[0], _prop_train[0]); + EXPECT_EQ(inputs.prop_test()[0], _prop_test[0]); + EXPECT_EQ(inputs.leave_out_inds()[0], _leave_out_inds[0]); + EXPECT_EQ(inputs.task_sizes_train()[0], _task_sizes_train[0]); + EXPECT_EQ(inputs.task_sizes_test()[0], _task_sizes_test[0]); + EXPECT_EQ(inputs.n_samp(), 4); + EXPECT_EQ(inputs.n_samp_test(), 1); + EXPECT_EQ(inputs.n_samp_train(), 3); + EXPECT_EQ(inputs.phi_0()[0].feat_ind(), _phi_0[0].feat_ind()); + EXPECT_EQ(inputs.phi_0_ptrs()[0]->feat_ind(), _phi_0[0].feat_ind()); + EXPECT_EQ(inputs.prop_unit(), _prop_unit); + EXPECT_EQ(inputs.filename(), _filename); + EXPECT_EQ(inputs.data_file(), _data_file); + EXPECT_EQ(inputs.prop_key(), _prop_key); + EXPECT_EQ(inputs.prop_label(), _prop_label); + EXPECT_EQ(inputs.task_key(), _task_key); + EXPECT_EQ(inputs.calc_type(), _calc_type); + EXPECT_EQ(inputs.cross_cor_max(), _cross_cor_max); + EXPECT_EQ(inputs.l_bound(), _l_bound); + EXPECT_EQ(inputs.u_bound(), _u_bound); + EXPECT_EQ(inputs.n_dim(), _n_dim); + EXPECT_EQ(inputs.max_rung(), _max_rung); + EXPECT_EQ(inputs.n_rung_store(), _n_rung_store); + EXPECT_EQ(inputs.n_rung_generate(), _n_rung_generate); + EXPECT_EQ(inputs.n_sis_select(), _n_sis_select); + EXPECT_EQ(inputs.n_residual(), _n_residual); + EXPECT_EQ(inputs.n_models_store(), _n_models_store); + EXPECT_EQ(inputs.max_param_depth(), _max_param_depth); + EXPECT_EQ(inputs.nlopt_seed(), _nlopt_seed); + EXPECT_EQ(inputs.fix_intercept(), _fix_intercept); + EXPECT_EQ(inputs.global_param_opt(), _global_param_opt); + EXPECT_EQ(inputs.reparam_residual(), _reparam_residual); + } +} diff --git a/tests/googletest/inputs/sisso.json b/tests/googletest/inputs/sisso.json new file mode 100644 index 0000000000000000000000000000000000000000..f41261dc992d0c1d090c254ae5530fee190504b2 --- /dev/null +++ b/tests/googletest/inputs/sisso.json @@ -0,0 +1,21 @@ +{ + "desc_dim": 2, + "n_sis_select": 1, + "max_rung": 1, + "n_residual": 1, + "n_models_store": 1, + "n_rung_store": 1, + "data_file": "googletest/inputs/data.csv", + "property_key": "property", + "task_key": "task", + "leave_out_inds": [3], + "opset": ["sq", "cb"], + "param_opset": ["log"], + "fix_intercept": false, + "min_abs_feat_val": 1e-5, + "max_abs_feat_val": 1e8, + "max_param_depth": 1, + "nlopt_seed": 10, + "global_param_opt": true, + "reparam_residual": true +} diff --git a/tests/googletest/utils/test_compare_features.cc b/tests/googletest/utils/test_compare_features.cc index c7fad834c2487682320e96b8c36e49811a284dfb..9826c79cbf010a02b24d1aed2ee1f57d68032d51 100644 --- a/tests/googletest/utils/test_compare_features.cc +++ b/tests/googletest/utils/test_compare_features.cc @@ -26,7 +26,7 @@ namespace { std::vector<double> scores = {0.9897782665572893}; std::vector<node_ptr> selected(1); - node_value_arrs::initialize_values_arr(4, 0, 1, 0, true, false); + node_value_arrs::initialize_values_arr({4}, {0}, 1, 0, false); selected[0] = std::make_shared<FeatureNode>(0, "A", val_3, std::vector<double>(), Unit()); node_value_arrs::initialize_d_matrix_arr(); diff --git a/tests/googletest/utils/test_project.cc b/tests/googletest/utils/test_project.cc index 93fd5a141ba53f27973f7c37cb53dec3fc64239b..0f4b70dc1c0eb8e242cfc670b222260d1589b78a 100644 --- a/tests/googletest/utils/test_project.cc +++ b/tests/googletest/utils/test_project.cc @@ -20,7 +20,7 @@ namespace { //test mean calculations TEST(Project, ProjectTest) { - node_value_arrs::initialize_values_arr(4, 0, 1, 0, true, false); + node_value_arrs::initialize_values_arr({4}, {0}, 1, 0, false); std::vector<double> prop = {1.0, 3.0, 5.0, 6.0}; std::vector<double> prop_class = {0.0, 0.0, 0.0, 1.0}; std::vector<double> val = {2.0, 2.0, 3.0, 4.0}; diff --git a/tests/pytest/sisso.json b/tests/pytest/sisso.json deleted file mode 100644 index 0790f966f3c7601e21b3b0db7be8d98dbd76cacc..0000000000000000000000000000000000000000 --- a/tests/pytest/sisso.json +++ /dev/null @@ -1,16 +0,0 @@ -{ - "desc_dim": 2, - "n_sis_select": 1, - "max_rung": 2, - "n_rung_store":0, - "n_residual": 1, - "data_file": "data_param.csv", - "property_key": "Prop", - "task_key": "Task", - "leave_out_frac": 0.05, - "n_models_store": 1, - "leave_out_inds": [ 1, 3, 52, 53, 78 ], - "opset": ["add", "sub", "abs_diff", "mult", "div", "inv", "abs", "exp", "log", "sin", "cos", "sq", "cb", "six_pow", "sqrt", "cbrt", "neg_exp"], - "param_opset": ["log", "exp", "sq"], - "fix_intercept": false -} diff --git a/tests/pytest/test_classification/test_classification.py b/tests/pytest/test_classification/test_classification.py index 1180e3a59df70973fcd1380134a8a93ad1198fa0..269603494f597107bdf589c0529d39729b288172 100644 --- a/tests/pytest/test_classification/test_classification.py +++ b/tests/pytest/test_classification/test_classification.py @@ -14,24 +14,29 @@ import shutil import numpy as np from sissopp import ( + Inputs, FeatureNode, + FeatureSpace, Unit, initialize_values_arr, SISSOClassifier, ) -from sissopp.py_interface import get_fs import matplotlib.pyplot as plt def test_sisso_classifier(): - initialize_values_arr(80, 20, 10, 2) + task_sizes_train = [80] + task_sizes_test = [20] - task_keys = ["task"] - sample_ids_train = [str(ii) for ii in range(20, 100)] - sample_ids_test = [str(ii) for ii in range(20)] + initialize_values_arr(task_sizes_train, task_sizes_test, 10, 2) - train_data = np.random.random((10, 80)) * 2.0 - 1.0 - test_data = np.random.random((10, 20)) * 2.0 - 1.0 + inputs = Inputs() + inputs.task_keys = ["task"] + inputs.sample_ids_train = [str(ii) for ii in range(20, 100)] + inputs.sample_ids_test = [str(ii) for ii in range(20)] + + train_data = np.random.random((10, task_sizes_train[0])) * 2.0 - 1.0 + test_data = np.random.random((10, task_sizes_test[0])) * 2.0 - 1.0 train_data[0][:20] = np.random.random(20) * -1.0 - 1.0 train_data[0][20:40] = np.random.random(20) + 1.0 @@ -51,8 +56,8 @@ def test_sisso_classifier(): test_data[1][5:15] = np.random.random(10) + 1.0 test_data[1][15:] = np.random.random(5) * -1.0 - 1.0 - prop = (np.sign(train_data[1] * train_data[0]) + 1) // 2 - prop_test = (np.sign(test_data[1] * test_data[0]) + 1) // 2 + inputs.prop_train = (np.sign(train_data[1] * train_data[0]) + 1) // 2 + inputs.prop_test = (np.sign(test_data[1] * test_data[0]) + 1) // 2 train_data[0][0] = 0.01 train_data[0][20] = -0.01 @@ -84,7 +89,7 @@ def test_sisso_classifier(): test_data[3][10] = -10.0 test_data[3][15] = 10.0 - phi_0 = [ + inputs.phi_0 = [ FeatureNode( ff, f"feat_{ff}", @@ -95,25 +100,22 @@ def test_sisso_classifier(): for ff in range(10) ] - op_set = ["add", "sub", "mult", "sq", "cb", "sqrt", "cbrt"] - - feat_space = get_fs(phi_0, prop, [80], op_set, [], "classification", 1, 10) - sisso = SISSOClassifier( - feat_space, - "class", - Unit("m"), - prop, - prop_test, - [80], - [20], - list(range(20)), - 2, - 1, - 1, - sample_ids_train, - sample_ids_test, - task_keys, - ) + inputs.allowed_ops = ["add", "sub", "mult", "sq", "cb", "sqrt", "cbrt"] + inputs.calc_type = "classification" + inputs.max_rung = 1 + inputs.n_sis_select = 10 + inputs.n_dim = 2 + inputs.n_residual = 1 + inputs.n_models_store = 1 + inputs.task_names = ["all"] + inputs.task_sizes_train = task_sizes_train + inputs.task_sizes_test = task_sizes_test + inputs.leave_out_inds = list(range(task_sizes_test[0])) + inputs.prop_label = "Class" + + feat_space = FeatureSpace(inputs) + + sisso = SISSOClassifier(inputs, feat_space) sisso.fit() shutil.rmtree("models/") diff --git a/tests/pytest/test_descriptor_identifier/test_log_regressor.py b/tests/pytest/test_descriptor_identifier/test_log_regressor.py index 163fa1c66208fe9e93fc56c6b95984c1d2ca899f..bb56c606ba77f3f97bf53b180fefe9b4eb927735 100644 --- a/tests/pytest/test_descriptor_identifier/test_log_regressor.py +++ b/tests/pytest/test_descriptor_identifier/test_log_regressor.py @@ -15,58 +15,67 @@ import shutil import numpy as np from sissopp import ( FeatureNode, + FeatureSpace, + Inputs, + SISSOLogRegressor, Unit, initialize_values_arr, - SISSOLogRegressor, ) -from sissopp.py_interface import get_fs def test_sisso_log_regressor(): - initialize_values_arr(90, 10, 10, 2) - phi_0 = [ + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 10, 2) + + inputs = Inputs() + inputs.phi_0 = [ FeatureNode( ff, f"feat_{ff}", - np.random.random(90) * 1e2, - np.random.random(10) * 1e2, + np.random.random(task_sizes_train[0]) * 1e2, + np.random.random(task_sizes_test[0]) * 1e2, Unit(), ) for ff in range(10) ] - task_keys = ["task"] - sample_ids_train = [str(ii) for ii in range(10, 100)] - sample_ids_test = [str(ii) for ii in range(10)] + inputs.task_names = ["task"] + inputs.sample_ids_train = [ + str(ii) + for ii in range(task_sizes_test[0], task_sizes_test[0] + task_sizes_train[0]) + ] + inputs.sample_ids_test = [str(ii) for ii in range(task_sizes_test[0])] a0 = 0.95 a1 = 1.01 c0 = np.random.random() * 100.0 - prop = c0 * np.power(phi_0[0].value, a0) * np.power(phi_0[2].value, a1) - prop_test = ( - c0 * np.power(phi_0[0].test_value, a0) * np.power(phi_0[2].test_value, a1) + inputs.prop_train = ( + c0 * np.power(inputs.phi_0[0].value, a0) * np.power(inputs.phi_0[2].value, a1) + ) + inputs.prop_test = ( + c0 + * np.power(inputs.phi_0[0].test_value, a0) + * np.power(inputs.phi_0[2].test_value, a1) ) - op_set = ["add", "sub", "mult", "sq", "cb", "sqrt", "cbrt"] + inputs.allowed_ops = ["add", "sub", "mult", "sq", "cb", "sqrt", "cbrt"] + inputs.calc_type = "log_regression" + inputs.max_rung = 0 + inputs.n_sis_select = 5 + inputs.n_dim = 2 + inputs.n_residual = 1 + inputs.n_models_store = 1 + inputs.task_sizes_train = task_sizes_train + inputs.task_sizes_test = task_sizes_test + inputs.leave_out_inds = list(range(task_sizes_test[0])) + inputs.fix_intercept = False + inputs.prop_label = "prop" + inputs.prop_unit = Unit("m") - feat_space = get_fs(phi_0, prop, [90], op_set, [], "log_regression", 0, 5) - sisso = SISSOLogRegressor( - feat_space, - "prop", - Unit("m"), - prop, - prop_test, - [90], - [10], - list(range(10)), - 2, - 1, - 1, - sample_ids_train, - sample_ids_test, - task_keys, - False, - ) + feat_space = FeatureSpace(inputs) + sisso = SISSOLogRegressor(inputs, feat_space) sisso.fit() shutil.rmtree("models/") shutil.rmtree("feature_space/") diff --git a/tests/pytest/test_descriptor_identifier/test_regressor.py b/tests/pytest/test_descriptor_identifier/test_regressor.py index 5b97ab992106df988775b9f3a22fdf3882cc4db5..062f0a21bce702b2283028ad40ded73b214b73cf 100644 --- a/tests/pytest/test_descriptor_identifier/test_regressor.py +++ b/tests/pytest/test_descriptor_identifier/test_regressor.py @@ -15,63 +15,68 @@ import shutil import numpy as np from sissopp import ( FeatureNode, + FeatureSpace, + Inputs, + SISSORegressor, Unit, initialize_values_arr, - SISSORegressor, ) -from sissopp.py_interface import get_fs def test_sisso_regressor(): - initialize_values_arr(95, 5, 10, 2) - phi_0 = [ + task_sizes_train = [95] + task_sizes_test = [5] + initialize_values_arr(task_sizes_train, task_sizes_test, 10, 2) + inputs = Inputs() + inputs.phi_0 = [ FeatureNode( ff, f"feat_{ff}", - np.random.random(95) * 1e2 - 50, - np.random.random(5) * 1e2 - 50, + np.random.random(task_sizes_train[0]) * 1e2 - 50, + np.random.random(task_sizes_test[0]) * 1e2 - 50, Unit(), ) for ff in range(10) ] - task_keys = ["task"] - sample_ids_train = [str(ii) for ii in range(5, 100)] - sample_ids_test = [str(ii) for ii in range(5)] + inputs.task_names = ["task"] + inputs.sample_ids_train = [ + str(ii) + for ii in range(task_sizes_test[0], task_sizes_test[0] + task_sizes_train[0]) + ] + inputs.sample_ids_test = [str(ii) for ii in range(task_sizes_test[0])] a0 = np.random.random() * 5.0 - 2.5 a1 = np.random.random() * 5.0 - 2.5 c0 = np.random.random() * 100.0 - 50 - prop = ( - c0 + a0 * np.power(phi_0[0].value + phi_0[1].value, 2.0) - a1 * phi_0[4].value + inputs.prop_train = ( + c0 + + a0 * np.power(inputs.phi_0[0].value + inputs.phi_0[1].value, 2.0) + - a1 * inputs.phi_0[4].value ) - prop_test = ( + inputs.prop_test = ( c0 - + a0 * np.power(phi_0[0].test_value + phi_0[1].test_value, 2.0) - - a1 * phi_0[4].test_value + + a0 * np.power(inputs.phi_0[0].test_value + inputs.phi_0[1].test_value, 2.0) + - a1 * inputs.phi_0[4].test_value ) - op_set = ["add", "sub", "mult", "sq", "cb", "sqrt", "cbrt"] + inputs.allowed_ops = ["add", "sub", "mult", "sq", "cb", "sqrt", "cbrt"] + inputs.calc_type = "regression" + inputs.max_rung = 2 + inputs.n_sis_select = 10 + inputs.n_dim = 2 + inputs.n_residual = 1 + inputs.n_models_store = 1 + inputs.task_sizes_train = task_sizes_train + inputs.task_sizes_test = task_sizes_test + inputs.leave_out_inds = list(range(task_sizes_test[0])) + inputs.fix_intercept = False + inputs.prop_label = "prop" + inputs.prop_unit = Unit("m") - feat_space = get_fs(phi_0, prop, [95], op_set, [], "regression", 2, 10) + feat_space = FeatureSpace(inputs) - sisso = SISSORegressor( - feat_space, - "prop", - Unit("m"), - prop, - prop_test, - [95], - [5], - list(range(5)), - 2, - 1, - 1, - sample_ids_train, - sample_ids_test, - task_keys, - False, - ) + sisso = SISSORegressor(inputs, feat_space) sisso.fit() shutil.rmtree("models/") diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_abs_diff_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_abs_diff_node.py index 50694fcac1b2b135d63fdaa9355037ff1957d169..0fd9642cc63a8401dc37895c0d3801916774620e 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_abs_diff_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_abs_diff_node.py @@ -28,16 +28,19 @@ class InvalidFeatureMade(Exception): def test_abs_diff_node(): - initialize_values_arr(900, 10, 4, 2) + task_sizes_train = [900] + task_sizes_test = [10] - data_1 = np.random.random(900) * 1e10 + 1e-10 - test_data_1 = np.random.random(10) * 1e10 + 1e-10 + initialize_values_arr(task_sizes_train, task_sizes_test, 4, 2) - data_2 = np.random.random(900) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 1e10 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e10 + 1e-10 - data_3 = np.random.random(900) * 1e4 + 1e-10 - test_data_3 = np.random.random(10) * 1e4 + 1e-10 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 + + data_3 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_3 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit("s")) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit("m")) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_abs_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_abs_node.py index 58f99375a776bf89e3c752344b74d1b696488bc8..925938a4a8b26746235bab3b1cbe34e182d29ebc 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_abs_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_abs_node.py @@ -21,15 +21,18 @@ class InvalidFeatureMade(Exception): def test_abs_node(): - initialize_values_arr(90, 10, 3, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + task_sizes_train = [90] + task_sizes_test = [10] - data_2 = np.random.choice([1.0, -1.0], 90) - test_data_2 = np.random.choice([1.0, -1.0], 10) + initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_3 = np.random.random(90) * 2e4 - 1e4 - test_data_3 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.choice([1.0, -1.0], task_sizes_train[0]) + test_data_2 = np.random.choice([1.0, -1.0], task_sizes_test[0]) + + data_3 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_3 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_add_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_add_node.py index 426ca94926230b1e461e34aa130fcd2802b4b2ac..de115c458f77f3c912d6e363572eb4189cc38a99 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_add_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_add_node.py @@ -21,16 +21,19 @@ class InvalidFeatureMade(Exception): def test_add_node(): - initialize_values_arr(90, 10, 4, 2) + task_sizes_train = [90] + task_sizes_test = [10] - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + initialize_values_arr(task_sizes_train, task_sizes_test, 4, 2) - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_3 = np.random.random(90) * 1e10 + 1e-10 - test_data_3 = np.random.random(10) * 1e10 + 1e-10 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 + + data_3 = np.random.random(task_sizes_train[0]) * 1e10 + 1e-10 + test_data_3 = np.random.random(task_sizes_test[0]) * 1e10 + 1e-10 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit("s")) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit("m")) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_cb_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_cb_node.py index 27e8f6a641fc645ed3321fca140fa605b79c0034..81863cf910069cf43b281f303bb34b348a589809 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_cb_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_cb_node.py @@ -30,12 +30,15 @@ class InvalidFeatureMade(Exception): def test_cube_node(): - initialize_values_arr(90, 10, 2, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + task_sizes_train = [90] + task_sizes_test = [10] - data_2 = np.random.random(90) * 2 - 1 - test_data_2 = np.random.random(10) * 2 - 1 + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 + + data_2 = np.random.random(task_sizes_train[0]) * 2 - 1 + test_data_2 = np.random.random(task_sizes_test[0]) * 2 - 1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_cbrt_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_cbrt_node.py index 1a3551ca0f73219176cf8bec8627ccd359e4b33b..0c0b2e2ff39eb5d18c7d02334b20481fddf13f86 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_cbrt_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_cbrt_node.py @@ -31,13 +31,16 @@ class InvalidFeatureMade(Exception): def test_cbrt_node(): - initialize_values_arr(90, 10, 2, 2) + task_sizes_train = [90] + task_sizes_test = [10] - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2) - data_2 = np.random.random(90) * 2 - 1 - test_data_2 = np.random.random(10) * 2 - 1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 + + data_2 = np.random.random(task_sizes_train[0]) * 2 - 1 + test_data_2 = np.random.random(task_sizes_test[0]) * 2 - 1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_cos_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_cos_node.py index c0d3faa2fec6314232159dda001ac9ba444f311f..da510ead267f0ed2c6681c49c188609ca10d8209 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_cos_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_cos_node.py @@ -21,13 +21,15 @@ class InvalidFeatureMade(Exception): def test_cos_node(): - initialize_values_arr(90, 10, 3, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2) - data_1 = np.random.randint(0, 10000, 90) * 2.0 * np.pi - test_data_1 = np.random.randint(0, 10000, 10) * 2.0 * np.pi + data_1 = np.random.randint(0, 10000, task_sizes_train[0]) * 2.0 * np.pi + test_data_1 = np.random.randint(0, 10000, task_sizes_test[0]) * 2.0 * np.pi - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_div_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_div_node.py index 216a7a9cfa1910d345a31b7fb0511f182b133c8f..07f10a89cccc713fc1c958c31122a186e98142a8 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_div_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_div_node.py @@ -28,16 +28,18 @@ class InvalidFeatureMade(Exception): def test_div_node(): - initialize_values_arr(90, 10, 5, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 5, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 - data_3 = np.random.random(90) * 1e10 + 1e-10 - test_data_3 = np.random.random(10) * 1e10 + 1e-10 + data_3 = np.random.random(task_sizes_train[0]) * 1e10 + 1e-10 + test_data_3 = np.random.random(task_sizes_test[0]) * 1e10 + 1e-10 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit("s")) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit("m")) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_exp_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_exp_node.py index b1e9c4cb75bc8407bcd39ad344e65006491dfec9..670687bffdc5c7d07b6dcceb2a3c565a6459beb6 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_exp_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_exp_node.py @@ -30,16 +30,18 @@ class InvalidFeatureMade(Exception): def test_exp_node(): - initialize_values_arr(90, 10, 3, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2) - data_1 = np.random.random(90) + 1e-10 - test_data_1 = np.random.random(10) + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) + 1e-10 - data_2 = np.random.random(90) * 2 - 1 - test_data_2 = np.random.random(10) * 2 - 1 + data_2 = np.random.random(task_sizes_train[0]) * 2 - 1 + test_data_2 = np.random.random(task_sizes_test[0]) * 2 - 1 - data_3 = np.random.random(90) * 10.0 + 1e-10 - test_data_3 = np.random.random(10) * 10.0 + 1e-10 + data_3 = np.random.random(task_sizes_train[0]) * 10.0 + 1e-10 + test_data_3 = np.random.random(task_sizes_test[0]) * 10.0 + 1e-10 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_inv_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_inv_node.py index 219dbfa8d8a260aca46069397b53435f6ecefe84..01ae9dc5d3044c6d09ed5b2b3373b51b5b6fc39a 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_inv_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_inv_node.py @@ -29,16 +29,18 @@ class InvalidFeatureMade(Exception): def test_inv_node(): - initialize_values_arr(90, 10, 4, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 4, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_2 = np.random.random(90) * 2 - 1 - test_data_2 = np.random.random(10) * 2 - 1 + data_2 = np.random.random(task_sizes_train[0]) * 2 - 1 + test_data_2 = np.random.random(task_sizes_test[0]) * 2 - 1 - data_3 = np.random.random(90) * 1e10 + 1e-10 - test_data_3 = np.random.random(10) * 1e10 + 1e-10 + data_3 = np.random.random(task_sizes_train[0]) * 1e10 + 1e-10 + test_data_3 = np.random.random(task_sizes_test[0]) * 1e10 + 1e-10 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_log_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_log_node.py index f4643ff0356cb4fe11a383607bdba8e0722d7314..f995a41116de109832c6eb03092142de47d74dac 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_log_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_log_node.py @@ -36,16 +36,18 @@ class InvalidFeatureMade(Exception): def test_log_node(): - initialize_values_arr(90, 10, 3, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_2 = np.random.random(90) * 2 - 1 - test_data_2 = np.random.random(10) * 2 - 1 + data_2 = np.random.random(task_sizes_train[0]) * 2 - 1 + test_data_2 = np.random.random(task_sizes_test[0]) * 2 - 1 - data_3 = np.random.random(90) * 1.0 + 1e-10 - test_data_3 = np.random.random(10) * 1.0 + 1e-10 + data_3 = np.random.random(task_sizes_train[0]) * 1.0 + 1e-10 + test_data_3 = np.random.random(task_sizes_test[0]) * 1.0 + 1e-10 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_mult_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_mult_node.py index ed02b56753461969764b6b5c11e3869f62040ff2..8c7a9b2937cdf1ac4312b1d0fc6da32e33e5af81 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_mult_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_mult_node.py @@ -28,16 +28,18 @@ class InvalidFeatureMade(Exception): def test_mult_node(): - initialize_values_arr(90, 10, 4, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 4, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 - data_3 = np.abs(np.random.random(90) * 1e10) + 0.1 - test_data_3 = np.abs(np.random.random(10) * 1e10) + 0.1 + data_3 = np.abs(np.random.random(task_sizes_train[0]) * 1e10) + 0.1 + test_data_3 = np.abs(np.random.random(task_sizes_test[0]) * 1e10) + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit("s")) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit("m")) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_neg_exp_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_neg_exp_node.py index f4d05d34408cba84c17a2730db59c50f590f28a4..2af614882498399f85612db44a9b0d05e8d8b2fd 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_neg_exp_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_neg_exp_node.py @@ -30,16 +30,18 @@ class InvalidFeatureMade(Exception): def test_neg_exp_node(): - initialize_values_arr(90, 10, 3, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2) - data_1 = np.random.random(90) + 1e-10 - test_data_1 = np.random.random(10) + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) + 1e-10 - data_2 = np.random.random(90) * 2 - 1 - test_data_2 = np.random.random(10) * 2 - 1 + data_2 = np.random.random(task_sizes_train[0]) * 2 - 1 + test_data_2 = np.random.random(task_sizes_test[0]) * 2 - 1 - data_3 = np.random.random(90) * 10.0 + 1e-10 - test_data_3 = np.random.random(10) * 10.0 + 1e-10 + data_3 = np.random.random(task_sizes_train[0]) * 10.0 + 1e-10 + test_data_3 = np.random.random(task_sizes_test[0]) * 10.0 + 1e-10 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_sin_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_sin_node.py index 1eddab2bb8b4e44c082992218018589e18478a14..9cef2b48ffcbae0f63e8812deba9b45c35c7ff4d 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_sin_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_sin_node.py @@ -21,13 +21,19 @@ class InvalidFeatureMade(Exception): def test_sin_node(): - initialize_values_arr(90, 10, 3, 2) - - data_1 = np.random.randint(0, 10000, 90) * (2.0) * np.pi + np.pi / 2.0 - test_data_1 = np.random.randint(0, 10000, 10) * (2.0) * np.pi + np.pi / 2.0 - - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2) + + data_1 = ( + np.random.randint(0, 10000, task_sizes_train[0]) * (2.0) * np.pi + np.pi / 2.0 + ) + test_data_1 = ( + np.random.randint(0, 10000, task_sizes_test[0]) * (2.0) * np.pi + np.pi / 2.0 + ) + + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_six_pow_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_six_pow_node.py index 0149f26ceadf16449de6bdcf732505e3abe42579..203022a2f73df921a9ba83ece0663c0a402aac1d 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_six_pow_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_six_pow_node.py @@ -31,13 +31,15 @@ class InvalidFeatureMade(Exception): def test_six_pow_node(): - initialize_values_arr(90, 10, 2, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2) - data_1 = np.random.random(90) * 1e1 + 1e-10 - test_data_1 = np.random.random(10) * 1e1 + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) * 1e1 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e1 + 1e-10 - data_2 = np.random.choice([1.0, -1.0], 90) - test_data_2 = np.random.choice([1.0, -1.0], 10) + data_2 = np.random.choice([1.0, -1.0], task_sizes_train[0]) + test_data_2 = np.random.choice([1.0, -1.0], task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_sq_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_sq_node.py index 1b79f2eb6e0269200e85cdac78b8b12011a3d6ac..3c1fcf245505e46fd82e7d62dc2cd09ad484228f 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_sq_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_sq_node.py @@ -28,13 +28,15 @@ class InvalidFeatureMade(Exception): def test_square_node(): - initialize_values_arr(90, 10, 2, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_2 = np.random.choice([1.0, -1.0], 90) - test_data_2 = np.random.choice([1.0, -1.0], 10) + data_2 = np.random.choice([1.0, -1.0], task_sizes_train[0]) + test_data_2 = np.random.choice([1.0, -1.0], task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_sqrt_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_sqrt_node.py index fdd558a836b65d46adc5237e8411cfc86eab53a4..0b225cd01d80f752f08a7ec5c4c66cf5792ae5a0 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_sqrt_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_sqrt_node.py @@ -31,13 +31,15 @@ class InvalidFeatureMade(Exception): def test_sqrt_node(): - initialize_values_arr(90, 10, 2, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_2 = np.random.random(90) * 2 - 1 - test_data_2 = np.random.random(10) * 2 - 1 + data_2 = np.random.random(task_sizes_train[0]) * 2 - 1 + test_data_2 = np.random.random(task_sizes_test[0]) * 2 - 1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_feature_creation/test_feat_generation/test_sub_node.py b/tests/pytest/test_feature_creation/test_feat_generation/test_sub_node.py index 3fd14fdfd319d7275942cae6368f1ea41d17becb..46ed37ff561f85d52f950570d74b65b6b0f62454 100644 --- a/tests/pytest/test_feature_creation/test_feat_generation/test_sub_node.py +++ b/tests/pytest/test_feature_creation/test_feat_generation/test_sub_node.py @@ -21,16 +21,18 @@ class InvalidFeatureMade(Exception): def test_sub_node(): - initialize_values_arr(90, 10, 4, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 4, 2) - data_1 = np.random.random(90) * 1e4 + 1e-10 - test_data_1 = np.random.random(10) * 1e4 + 1e-10 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 1e-10 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 1e-10 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 - data_3 = np.random.random(90) * 1e10 + 1e-10 - test_data_3 = np.random.random(10) * 1e10 + 1e-10 + data_3 = np.random.random(task_sizes_train[0]) * 1e10 + 1e-10 + test_data_3 = np.random.random(task_sizes_test[0]) * 1e10 + 1e-10 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit("s")) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit("m")) diff --git a/tests/pytest/test_feature_creation/test_feature_space/test_feature_space.py b/tests/pytest/test_feature_creation/test_feature_space/test_feature_space.py index 8c77ea00c8cda6afe9b464e49408abf3da10a234..a056c0bc1df3aef30dd74490f5b6101063009203 100644 --- a/tests/pytest/test_feature_creation/test_feature_space/test_feature_space.py +++ b/tests/pytest/test_feature_creation/test_feature_space/test_feature_space.py @@ -16,31 +16,42 @@ import numpy as np from sissopp import ( FeatureNode, + FeatureSpace, + Inputs, Unit, initialize_values_arr, ) -from sissopp.py_interface import get_fs def test_feature_space(): - initialize_values_arr(90, 10, 10, 2) - phi_0 = [ + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 10, 2) + + inputs = Inputs() + inputs.phi_0 = [ FeatureNode( ff, f"feat_{ff}", - np.random.random(90) * 1e2 - 50, - np.random.random(10) * 1e2 - 50, + np.random.random(task_sizes_train[0]) * 1e2 - 50, + np.random.random(task_sizes_test[0]) * 1e2 - 50, Unit(), ) for ff in range(10) ] - prop = np.power(phi_0[0].value + phi_0[1].value, 2.0) + inputs.prop_train = np.power(inputs.phi_0[0].value + inputs.phi_0[1].value, 2.0) - op_set = ["add", "sub", "mult", "sq", "cb", "sqrt", "cbrt"] + inputs.allowed_ops = ["add", "sub", "mult", "sq", "cb", "sqrt", "cbrt"] + inputs.task_sizes_train = task_sizes_train + inputs.task_sizes_test = task_sizes_test + inputs.calc_type = "regression" + inputs.max_rung = 2 + inputs.n_sis_select = 10 - feat_space = get_fs(phi_0, prop, [90], op_set, [], "regression", 2, 10) - feat_space.sis(prop) + feat_space = FeatureSpace(inputs) + feat_space.sis(inputs.prop_train) shutil.rmtree("feature_space/") diff --git a/tests/pytest/test_feature_creation/test_feature_space/test_gen_feature_space_from_file.py b/tests/pytest/test_feature_creation/test_feature_space/test_gen_feature_space_from_file.py index 865cc68cd6876c4665394c388d5726c819c599f2..6bf76aa8b963659132ec181bd0528352107fa0ce 100644 --- a/tests/pytest/test_feature_creation/test_feature_space/test_gen_feature_space_from_file.py +++ b/tests/pytest/test_feature_creation/test_feature_space/test_gen_feature_space_from_file.py @@ -27,13 +27,16 @@ parent = pathlib.Path(__file__).parent.absolute() def test_gen_feature_space_from_file(): - initialize_values_arr(90, 10, 10, 1) + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 10, 1) phi_0 = [ FeatureNode( ff, f"feat_{ff}", - np.random.random(90) * 1e2 - 50, - np.random.random(10) * 1e2 - 50, + np.random.random(task_sizes_train[0]) * 1e2 - 50, + np.random.random(task_sizes_test[0]) * 1e2 - 50, Unit(), ) for ff in range(10) @@ -42,7 +45,7 @@ def test_gen_feature_space_from_file(): prop = np.power(phi_0[0].value + phi_0[1].value, 2.0) feat_space = FeatureSpace( - f"{parent}/phi.txt", phi_0, prop, [90], "regression", 1, 1.0 + f"{parent}/phi.txt", phi_0, prop, task_sizes_train, "regression", 1, 1.0 ) feat_space.sis(prop) assert feat_space.phi_selected[0].postfix_expr == "1|0|add|sq" diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_lorentizan.py b/tests/pytest/test_feature_creation/test_parameterize/test_lorentizan.py index d165e0b452dead7c2a53c44cb818f79dae50f97e..14eaa0b06266e21483227c1ac5b0605975c76688 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_lorentizan.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_lorentizan.py @@ -35,15 +35,18 @@ def check_feat_parmeters(feat, prop): def test_lorentzian(): - initialize_values_arr(900, 10, 1, 2) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 2) initialize_param_storage() - data_1 = np.linspace(-20.023658, 20.23658, 900) - test_data_1 = np.linspace(-19.98549, 19.08, 10) + data_1 = np.linspace(-20.023658, 20.23658, task_sizes_train[0]) + test_data_1 = np.linspace(-19.98549, 19.08, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = 21.4 / ((data_1 - 0.25) ** 2.0 + 7.1) - 1.478 - optimizer = get_reg_optimizer([900], prop, 2, 2, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 2, 2, 0.5, False) feat_node = InvParamNode(SqNode(feat_1, 2, 1e-50, 1e50), 3, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_abs.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_abs.py index 4a265d0b84f018f8514874048df9ed73c0d51607..c7ee38f203cac3d797e28f7b3fc0568b09428951 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_abs.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_abs.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_abs_node(): - initialize_values_arr(900, 10, 1, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(-20, 20, 900) - test_data_1 = np.linspace(-19.99, 19.99, 10) + data_1 = np.linspace(-20, 20, task_sizes_train[0]) + test_data_1 = np.linspace(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = -2.3 * np.abs(1.55 * data_1 + 0.8751) - 1.2 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = AbsParamNode(feat_1, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_abs_diff.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_abs_diff.py index 2745cff34a35d76f9a1fbc5dbef7ae2867f80a48..0e27d3541acdb53bb5ce0bab12b5df525167d968 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_abs_diff.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_abs_diff.py @@ -34,18 +34,21 @@ def check_feat_parmeters(feat, prop): def test_param_abs_diff_node(): - initialize_values_arr(900, 100, 2, 1) + task_sizes_train = [900] + task_sizes_test = [10] - data_1 = np.linspace(-20, 20, 900) - test_data_1 = np.linspace(-19.99, 19.99, 100) + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) + + data_1 = np.linspace(-20, 20, task_sizes_train[0]) + test_data_1 = np.linspace(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) - data_2 = np.linspace(-14.256, 18.6523, 900) - test_data_2 = np.linspace(-16.256, 17.6523, 100) + data_2 = np.linspace(-14.256, 18.6523, task_sizes_train[0]) + test_data_2 = np.linspace(-16.256, 17.6523, task_sizes_test[0]) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) prop = -2.3 * np.abs(data_1 - (1.5 * data_2 + 0.8751)) - 1.2 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = AbsDiffParamNode(feat_1, feat_2, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_add.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_add.py index bc44eb695a82187d106b13a2a5373dd0d77f88e3..6d7d35ced976f031e53d077ba3d59e22f025824e 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_add.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_add.py @@ -33,19 +33,22 @@ def check_feat_parmeters(feat, prop): def test_param_add_node(): - initialize_values_arr(90, 10, 2, 1) + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) initialize_param_storage() - data_1 = np.linspace(-20, 20, 90) - test_data_1 = np.linspace(-19.99, 19.99, 10) + data_1 = np.linspace(-20, 20, task_sizes_train[0]) + test_data_1 = np.linspace(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) - data_2 = np.linspace(-14.256, 18.6523, 90) - test_data_2 = np.linspace(-16.256, 17.6523, 10) + data_2 = np.linspace(-14.256, 18.6523, task_sizes_train[0]) + test_data_2 = np.linspace(-16.256, 17.6523, task_sizes_test[0]) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) prop = -2.3 * (data_1 + 1.5 * data_2) - 1.2 - optimizer = get_reg_optimizer([90], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = AddParamNode(feat_1, feat_2, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_cb.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_cb.py index 16ba2772a59fe4130e2b48edc408ddb2e606366b..7bd015a0213388f23d347e426205a96d254eaf1c 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_cb.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_cb.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_cb_node(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(-15, 15, 90) - test_data_1 = np.linspace(-19.99, 19.99, 10) + data_1 = np.linspace(-15, 15, task_sizes_train[0]) + test_data_1 = np.linspace(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = 1.55 * np.power(data_1 + 0.8751, 3.0) - 1.2 - optimizer = get_reg_optimizer([90], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = CbParamNode(feat_1, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_cbrt.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_cbrt.py index 1b0bbe60356f7753eb5aa151867c89312a22bde0..861920f9c4d511011a480ae05cb2d2d612576364 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_cbrt.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_cbrt.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_cbrt_node(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(0.5, 20, 90) - test_data_1 = np.linspace(0.52145, 19.99, 10) + data_1 = np.linspace(0.5, 20, task_sizes_train[0]) + test_data_1 = np.linspace(0.52145, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = np.cbrt(1.55 * data_1 + 0.8751) - 1.2 - optimizer = get_reg_optimizer([90], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = CbrtParamNode(feat_1, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_cos.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_cos.py index 527e6978cfe175782c46f53cd837e266bee4a686..0e57422251122c8609fc9853eac10a0370a00d94 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_cos.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_cos.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_cos_node(): - initialize_values_arr(900, 100, 1, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(-2.0 * np.pi, 2.0 * np.pi, 900) - test_data_1 = np.linspace(-1.994 * np.pi, 1.994 * np.pi, 100) + data_1 = np.linspace(-2.0 * np.pi, 2.0 * np.pi, task_sizes_train[0]) + test_data_1 = np.linspace(-1.994 * np.pi, 1.994 * np.pi, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = -1.1 * np.cos(1.25 * data_1 + 2.13) + 0.01578 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = CosParamNode(feat_1, 1, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_div.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_div.py index 2bbc8fbbd2f55ed0c4aae831f09d1ebf5b74e44c..be4dbc1c320dbef67732f2b227a74456bc246e66 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_div.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_div.py @@ -34,19 +34,22 @@ def check_feat_parmeters(feat, prop): def test_param_div_node(): - initialize_values_arr(900, 10, 2, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) initialize_param_storage() - data_1 = np.random.uniform(-2.50, 2.50, 900) - test_data_1 = np.linspace(0.52145, 19.99, 10) + data_1 = np.random.uniform(-2.50, 2.50, task_sizes_train[0]) + test_data_1 = np.linspace(0.52145, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) - data_2 = np.random.uniform(0.5, 5.0, 900) - test_data_2 = np.linspace(0.41, 19.8432, 10) + data_2 = np.random.uniform(0.5, 5.0, task_sizes_train[0]) + test_data_2 = np.linspace(0.41, 19.8432, task_sizes_test[0]) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) prop = 4.124 * data_1 / ((data_2 + 1.8751)) - 0.12 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = DivParamNode(feat_1, feat_2, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_exp.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_exp.py index 0cccc1b361fd900a592e72afecb913487fe5d1af..61c17fdab5950caa1ebd8799aa66d82aeabfd95e 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_exp.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_exp.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_exp_node(): - initialize_values_arr(900, 10, 1, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.random.uniform(-2.0, 2.0, 900) - test_data_1 = np.linspace(-19.99, 19.99, 10) + data_1 = np.random.uniform(-2.0, 2.0, task_sizes_train[0]) + test_data_1 = np.linspace(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = np.exp(1.05 * data_1 + 0.08751) - 0.12 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = ExpParamNode(feat_1, 1, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_inv.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_inv.py index 2da9ff2afc3bcf24c42d99999ef86519ba19d3bd..492984bcc725b0927e37ddc7d17f381dcb8cf9ce 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_inv.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_inv.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_inv_node(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(0.5, 20, 90) - test_data_1 = np.linspace(1.0, 19.99, 10) + data_1 = np.linspace(0.5, 20, task_sizes_train[0]) + test_data_1 = np.linspace(1.0, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = 1.0 / (1.55 * data_1 + 0.8751) - 1.2 - optimizer = get_reg_optimizer([90], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = InvParamNode(feat_1, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_log.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_log.py index 727d1534a1d76c7f500df9025471df08fa4708f0..33195bcbef5f5e492ddb477b0bdfbddac53cc95d 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_log.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_log.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_log_node(): - initialize_values_arr(900, 10, 1, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(0.5, 20, 900) - test_data_1 = np.linspace(0.52145, 19.99, 10) + data_1 = np.linspace(0.5, 20, task_sizes_train[0]) + test_data_1 = np.linspace(0.52145, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = -2.014 * np.log(1.15 * data_1 + 0.1387) - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = LogParamNode(feat_1, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_neg_exp.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_neg_exp.py index c867a5657f1f5faae25ca4e3e9ea6ef0b0060ac3..6d8fcda8af0d1cd5cd28c07c9c87e7bdac144b55 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_neg_exp.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_neg_exp.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_neg_exp_node(): - initialize_values_arr(900, 10, 1, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.random.uniform(-5.0, 5.0, 900) - test_data_1 = np.linspace(-19.99, 19.99, 10) + data_1 = np.random.uniform(-5.0, 5.0, task_sizes_train[0]) + test_data_1 = np.linspace(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = np.exp(-1.05 * data_1 + 0.08751) - 0.12 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = NegExpParamNode(feat_1, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_sin.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_sin.py index 32ce702e20cc88050e44fbab132c3e857705f06a..5de305a9db4b5d88365194643d36df5e9f8c5e9f 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_sin.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_sin.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_sin_node(): - initialize_values_arr(900, 100, 1, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(-2.0 * np.pi, 2.0 * np.pi, 900) - test_data_1 = np.linspace(-1.994 * np.pi, 1.994 * np.pi, 100) + data_1 = np.linspace(-2.0 * np.pi, 2.0 * np.pi, task_sizes_train[0]) + test_data_1 = np.linspace(-1.994 * np.pi, 1.994 * np.pi, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = -1.1 * np.sin(1.25 * data_1 + 2.13) + 0.01578 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = SinParamNode(feat_1, 1, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_six_pow.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_six_pow.py index 20ed0986d0b11a76a59f5aa89a4e738a0936f5bb..2a2376dc24fd122c2a769c5a3e3ed089df0ed7cd 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_six_pow.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_six_pow.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_six_pow_node(): - initialize_values_arr(900, 10, 1, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.random.uniform(-2.0, 2.0, 900) - test_data_1 = np.random.uniform(-19.99, 19.99, 10) + data_1 = np.random.uniform(-2.0, 2.0, task_sizes_train[0]) + test_data_1 = np.random.uniform(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = 1.55 * np.power(data_1 + 0.21, 6.0) - 0.12 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = SixPowParamNode(feat_1, 1, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_sq.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_sq.py index 1e4b67c6c1edad2c3b36959e061a3017ae048801..50102b38df4c0f171ae46748ae929026bfce9dde 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_sq.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_sq.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_sq_node(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(-20, 20, 90) - test_data_1 = np.linspace(-19.99, 19.99, 10) + data_1 = np.linspace(-20, 20, task_sizes_train[0]) + test_data_1 = np.linspace(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = np.power(1.55 * data_1 + 0.8751, 2.0) - 1.2 - optimizer = get_reg_optimizer([90], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = SqParamNode(feat_1, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_sqrt.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_sqrt.py index 3542d830f024c216515efddd8e4bb93395c4bf8f..f5144c7c3c81cf0332cd5593d2875b254b448c69 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_sqrt.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_sqrt.py @@ -34,15 +34,18 @@ def check_feat_parmeters(feat, prop): def test_param_sqrt_node(): - initialize_values_arr(900, 10, 1, 1) + task_sizes_train = [900] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - data_1 = np.linspace(0.5, 500, 900) - test_data_1 = np.linspace(0.52145, 19.99, 10) + data_1 = np.linspace(0.5, 500, task_sizes_train[0]) + test_data_1 = np.linspace(0.52145, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) prop = np.sqrt(1.55 * data_1 + 0.8751) - 1.2 - optimizer = get_reg_optimizer([900], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = SqrtParamNode(feat_1, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_feature_creation/test_parameterize/test_param_sub.py b/tests/pytest/test_feature_creation/test_parameterize/test_param_sub.py index f0fd0edc1892aaa3cfea7bcad7e544e1ced44671..615a76cf6a4f3ab0e2e79d9a38b14bf4aa154640 100644 --- a/tests/pytest/test_feature_creation/test_parameterize/test_param_sub.py +++ b/tests/pytest/test_feature_creation/test_parameterize/test_param_sub.py @@ -33,19 +33,22 @@ def check_feat_parmeters(feat, prop): def test_param_sub_node(): - initialize_values_arr(90, 10, 2, 1) + task_sizes_train = [90] + task_sizes_test = [10] + + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) initialize_param_storage() - data_1 = np.linspace(-20, 20, 90) - test_data_1 = np.linspace(-19.99, 19.99, 10) + data_1 = np.linspace(-20, 20, task_sizes_train[0]) + test_data_1 = np.linspace(-19.99, 19.99, task_sizes_test[0]) feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) - data_2 = np.linspace(-14.256, 18.6523, 90) + data_2 = np.linspace(-14.256, 18.6523, task_sizes_train[0]) test_data_2 = np.linspace(-16.256, 17.6523, 10) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) prop = -2.3 * (data_1 - 1.5 * data_2) - 1.2 - optimizer = get_reg_optimizer([90], prop, 1, 1, 0.5, False) + optimizer = get_reg_optimizer([task_sizes_train[0]], prop, 1, 1, 0.5, False) feat_node = SubParamNode(feat_1, feat_2, 2, 1e-50, 1e50) feat_node.get_parameters(optimizer) diff --git a/tests/pytest/test_inputs/test_inputs.py b/tests/pytest/test_inputs/test_inputs.py new file mode 100644 index 0000000000000000000000000000000000000000..26d634a78d6933f9a7eb34dc3d32754a60032e06 --- /dev/null +++ b/tests/pytest/test_inputs/test_inputs.py @@ -0,0 +1,165 @@ +# Copyright 2021 Thomas A. R. Purcell +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import shutil +import numpy as np +from sissopp import ( + FeatureNode, + Inputs, + Unit, + finalize_values_arr, +) +import matplotlib.pyplot as plt + + +def test_inputs(): + finalize_values_arr() + + inputs = Inputs() + + sample_ids_train = ["a", "b", "c"] + inputs.sample_ids_train = sample_ids_train + assert inputs.sample_ids_train == sample_ids_train + + sample_ids_test = ["d"] + inputs.sample_ids_test = sample_ids_test + assert inputs.sample_ids_test == sample_ids_test + + task_sizes_train = [2, 1] + inputs.task_sizes_train = task_sizes_train + assert inputs.task_sizes_train == task_sizes_train + + task_sizes_test = [1, 0] + inputs.task_sizes_test = task_sizes_test + assert inputs.task_sizes_test == task_sizes_test + + task_names = ["task_1", "task_2"] + inputs.task_names = task_names + assert inputs.task_names == task_names + + allowed_param_ops = ["log"] + inputs.allowed_param_ops = allowed_param_ops + assert inputs.allowed_param_ops == allowed_param_ops + + allowed_ops = ["sq", "cb"] + inputs.allowed_ops = allowed_ops + assert inputs.allowed_ops == allowed_ops + + prop_train = np.array([1.0, 4.0, 9.0]) + inputs.prop_train = prop_train + assert np.all(inputs.prop_train == prop_train) + + prop_test = np.array([16.0]) + inputs.prop_test = prop_test + assert np.all(inputs.prop_test == prop_test) + + leave_out_inds = [3] + inputs.leave_out_inds = leave_out_inds + assert inputs.leave_out_inds == leave_out_inds + + phi_0 = [FeatureNode(0, "feat_1", [1.0, 2.0, 3.0], [4.0], Unit("m"))] + inputs.phi_0 = phi_0 + assert inputs.phi_0[0].expr == phi_0[0].expr + + prop_unit = Unit("m") + inputs.prop_unit = prop_unit + assert inputs.prop_unit == prop_unit + + filename = "googletest/inputs/sisso.json" + inputs.filename = filename + assert inputs.filename == filename + + data_file = "googletest/inputs/data.csv" + inputs.data_file = data_file + assert inputs.data_file == data_file + + prop_key = "property" + inputs.prop_key = prop_key + assert inputs.prop_key == prop_key + + prop_label = "property" + inputs.prop_label = prop_label + assert inputs.prop_label == prop_label + + task_key = "task" + inputs.task_key = task_key + assert inputs.task_key == task_key + + calc_type = "regression" + inputs.calc_type = calc_type + assert inputs.calc_type == calc_type + + cross_cor_max = 1.0 + inputs.cross_cor_max = cross_cor_max + assert inputs.cross_cor_max == cross_cor_max + + l_bound = 1e-5 + inputs.l_bound = l_bound + assert inputs.l_bound == l_bound + + u_bound = 1e8 + inputs.u_bound = u_bound + assert inputs.u_bound == u_bound + + n_dim = 2 + inputs.n_dim = n_dim + assert inputs.n_dim == n_dim + + max_rung = 1 + inputs.max_rung = max_rung + assert inputs.max_rung == max_rung + + n_rung_store = 1 + inputs.n_rung_store = n_rung_store + assert inputs.n_rung_store == n_rung_store + + n_rung_generate = 0 + inputs.n_rung_generate = n_rung_generate + assert inputs.n_rung_generate == n_rung_generate + + n_sis_select = 1 + inputs.n_sis_select = n_sis_select + assert inputs.n_sis_select == n_sis_select + + n_residual = 1 + inputs.n_residual = n_residual + assert inputs.n_residual == n_residual + + n_models_store = 1 + inputs.n_models_store = n_models_store + assert inputs.n_models_store == n_models_store + + max_param_depth = 1 + inputs.max_param_depth = max_param_depth + assert inputs.max_param_depth == max_param_depth + + nlopt_seed = 10 + inputs.nlopt_seed = nlopt_seed + assert inputs.nlopt_seed == nlopt_seed + + fix_intercept = False + inputs.fix_intercept = fix_intercept + assert inputs.fix_intercept == fix_intercept + + global_param_opt = True + inputs.global_param_opt = global_param_opt + assert inputs.global_param_opt == global_param_opt + + reparam_residual = True + inputs.reparam_residual = reparam_residual + assert inputs.reparam_residual == reparam_residual + + +if __name__ == "__main__": + test_inputs() diff --git a/tests/pytest/test_model_eval/test_model_node/test_abs.py b/tests/pytest/test_model_eval/test_model_node/test_abs.py index 95cc923023b064afd133db472d2c33ad39936f4d..ecb579b647af6c76392ba162e57ceb6ca40ac1de 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_abs.py +++ b/tests/pytest/test_model_eval/test_model_node/test_abs.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_abs_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_abs_diff.py b/tests/pytest/test_model_eval/test_model_node/test_abs_diff.py index b24c4b1561e4ab38e5fab4baecf28d9db508e944..a1ea85b88b1e651ebfb4c36e15416d4d8a8ab4ef 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_abs_diff.py +++ b/tests/pytest/test_model_eval/test_model_node/test_abs_diff.py @@ -27,12 +27,14 @@ class InvalidFeatureMade(Exception): def test_abs_diff_model_eval(): - initialize_values_arr(90, 10, 2, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 - - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 + + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_model_node/test_add.py b/tests/pytest/test_model_eval/test_model_node/test_add.py index 44d952859a4344929551274e9173344fc2e5de25..d38e915d67e4ac79248414778033c14ea19384fa 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_add.py +++ b/tests/pytest/test_model_eval/test_model_node/test_add.py @@ -27,12 +27,14 @@ class InvalidFeatureMade(Exception): def test_add_model_eval(): - initialize_values_arr(90, 10, 2, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 - - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 + + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_model_node/test_binary_binary.py b/tests/pytest/test_model_eval/test_model_node/test_binary_binary.py index df10363c3b299ad4bcefe1371e6807fa63fbc3e5..899b218c333b7f00cef67fc7e43abf9f5a0c879d 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_binary_binary.py +++ b/tests/pytest/test_model_eval/test_model_node/test_binary_binary.py @@ -28,15 +28,17 @@ class InvalidFeatureMade(Exception): def test_bin_bin_model_eval(): - initialize_values_arr(90, 10, 3, 2) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 - data_3 = np.random.random(90) * 2e4 - 1e4 - test_data_3 = np.random.random(10) * 2e4 - 1e4 + data_3 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_3 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) @@ -46,9 +48,9 @@ def test_bin_bin_model_eval(): node_2 = AddNode(node_1, feat_2, 4, 1e-50, 1e50) model_node = ModelNode(node_2) - data_1 = np.random.random(90) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - data_3 = np.random.random(90) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + data_3 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 val_data = data_1 * data_3 + data_2 data_dict = {"t_a": data_1[0], "x_a": data_2[0], "m_a": data_3[0]} diff --git a/tests/pytest/test_model_eval/test_model_node/test_binary_unary.py b/tests/pytest/test_model_eval/test_model_node/test_binary_unary.py index 0d66d222f5b65215d1d301e521976c80a8a26c46..f3fa36214226931f4f32fd4565f6f90b8d83fa9c 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_binary_unary.py +++ b/tests/pytest/test_model_eval/test_model_node/test_binary_unary.py @@ -28,12 +28,14 @@ class InvalidFeatureMade(Exception): def test_bin_un_model_eval(): - initialize_values_arr(90, 10, 2, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) @@ -42,8 +44,8 @@ def test_bin_un_model_eval(): node_2 = AddNode(node_1, feat_2, 2, 1e-50, 1e50) model_node = ModelNode(node_2) - data_1 = np.random.random(90) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 val_data = data_1 ** 2.0 + data_2 data_dict = {"t_a": data_1[0], "x_a": data_2[0]} diff --git a/tests/pytest/test_model_eval/test_model_node/test_cb.py b/tests/pytest/test_model_eval/test_model_node/test_cb.py index d1fc6903f9dab100cc511c171ec95212ff93a8ac..d989a5ceb2721a509fa5310d6560aa89879aa1af 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_cb.py +++ b/tests/pytest/test_model_eval/test_model_node/test_cb.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_cb_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_cbrt.py b/tests/pytest/test_model_eval/test_model_node/test_cbrt.py index e38e4465b87460bebd8717ff808e95af29bd8eea..c833ae04672e1291fb7b93d3dfec14f38b5f3ae2 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_cbrt.py +++ b/tests/pytest/test_model_eval/test_model_node/test_cbrt.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_cbrt_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_cos.py b/tests/pytest/test_model_eval/test_model_node/test_cos.py index 42e2a44ea6c474a9b23a70ba595e563d0139d5bd..d75e39119d7c02d7173fe2dc323f8804dd398efd 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_cos.py +++ b/tests/pytest/test_model_eval/test_model_node/test_cos.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_cos_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_div.py b/tests/pytest/test_model_eval/test_model_node/test_div.py index 2b9549e543968c7785cebfabc78cf69fef2b65aa..31dac1a4e0308f4755db90afe4c3da6b3ed9229f 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_div.py +++ b/tests/pytest/test_model_eval/test_model_node/test_div.py @@ -27,12 +27,14 @@ class InvalidFeatureMade(Exception): def test_div_model_eval(): - initialize_values_arr(90, 10, 2, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 - - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 + + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_model_node/test_exp.py b/tests/pytest/test_model_eval/test_model_node/test_exp.py index 76c0cc9c9c90f4c382f0445f043a642c1e99a7b1..93bf802d36ecb464631f54ef42f23fd6a799c9b8 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_exp.py +++ b/tests/pytest/test_model_eval/test_model_node/test_exp.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_exp_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 2e1 - 1e1 - test_data_1 = np.random.random(10) * 2e1 - 1e1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 2e1 - 1e1 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e1 - 1e1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_inv.py b/tests/pytest/test_model_eval/test_model_node/test_inv.py index 543d4f0b2f4b473b8f7c5434d7675069362ca132..34e193e3863544e1f09d8e0b10fecec946635ef4 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_inv.py +++ b/tests/pytest/test_model_eval/test_model_node/test_inv.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_inv_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_log.py b/tests/pytest/test_model_eval/test_model_node/test_log.py index bcf597b60005f994368eea06a501903ace40c6c9..a29dbcf6f317b70e22e7616ceebf181f66c8b636 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_log.py +++ b/tests/pytest/test_model_eval/test_model_node/test_log.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_log_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_mult.py b/tests/pytest/test_model_eval/test_model_node/test_mult.py index 53d995c009a0f99088904143497c06567847cf50..7c70dc8556059fbd3f689a61f46955d574b5b717 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_mult.py +++ b/tests/pytest/test_model_eval/test_model_node/test_mult.py @@ -27,12 +27,14 @@ class InvalidFeatureMade(Exception): def test_mult_model_eval(): - initialize_values_arr(90, 10, 2, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 - - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 + + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_model_node/test_neg_exp.py b/tests/pytest/test_model_eval/test_model_node/test_neg_exp.py index b8d6deee8f03835a53cfef099565b837840a0cb7..bb0e844b9b40c0e5b1ba02ca5e24dd4051734c62 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_neg_exp.py +++ b/tests/pytest/test_model_eval/test_model_node/test_neg_exp.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_neg_exp_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 2e1 - 1e1 - test_data_1 = np.random.random(10) * 2e1 - 1e1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 2e1 - 1e1 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e1 - 1e1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_sin.py b/tests/pytest/test_model_eval/test_model_node/test_sin.py index f52212fecca55edd5d52472b5ccbffc60f8228dc..a08b3fe49104461ca20a1bb0e403c2f6d22a80a5 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_sin.py +++ b/tests/pytest/test_model_eval/test_model_node/test_sin.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_sin_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_six_pow.py b/tests/pytest/test_model_eval/test_model_node/test_six_pow.py index 1e7f5fd9e05523e20f8a90f079d8163a9a8b59dd..19d20f433a902a0f1fa56e1d09e2aec6ec1d645b 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_six_pow.py +++ b/tests/pytest/test_model_eval/test_model_node/test_six_pow.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_six_pow_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_sq.py b/tests/pytest/test_model_eval/test_model_node/test_sq.py index 0ec268f658845187d0a090347ca24a0bb4a91030..a47f9ebf98b3cd5c5738509686cab13fa1f46e41 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_sq.py +++ b/tests/pytest/test_model_eval/test_model_node/test_sq.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_sq_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_sqrt.py b/tests/pytest/test_model_eval/test_model_node/test_sqrt.py index 3c839a90623500e7e13dd396cb6d0fae10575d2f..4d10d6bb7d4cbedc26d0a3822bee66597da2599c 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_sqrt.py +++ b/tests/pytest/test_model_eval/test_model_node/test_sqrt.py @@ -27,9 +27,11 @@ class InvalidFeatureMade(Exception): def test_sqrt_model_eval(): - initialize_values_arr(90, 10, 1, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_model_node/test_sub.py b/tests/pytest/test_model_eval/test_model_node/test_sub.py index 95398958efdee70559d0af65100b7af50fb53f8b..92cecc3c20773d61bf6044301b647aaa6f9e16f1 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_sub.py +++ b/tests/pytest/test_model_eval/test_model_node/test_sub.py @@ -27,12 +27,14 @@ class InvalidFeatureMade(Exception): def test_sub_model_eval(): - initialize_values_arr(90, 10, 2, 1) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 - - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 + + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_model_node/test_unary_binary.py b/tests/pytest/test_model_eval/test_model_node/test_unary_binary.py index b32b610d5bb940afb491fab2fde1e8c9b93146e4..1ec0e4dc0a1cec642ece9f180f9bef68882cc7f7 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_unary_binary.py +++ b/tests/pytest/test_model_eval/test_model_node/test_unary_binary.py @@ -28,12 +28,14 @@ class InvalidFeatureMade(Exception): def test_un_bin_model_eval(): - initialize_values_arr(90, 10, 2, 2) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) @@ -42,8 +44,8 @@ def test_un_bin_model_eval(): node_2 = SqNode(node_1, 3, 1e-50, 1e50) model_node = ModelNode(node_2) - data_1 = np.random.random(90) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 val_data = (data_1 + data_2) ** 2.0 data_dict = {"t_a": data_1[0], "x_a": data_2[0]} diff --git a/tests/pytest/test_model_eval/test_model_node/test_unary_unary.py b/tests/pytest/test_model_eval/test_model_node/test_unary_unary.py index a9e94295788a3b046772d1fffcb3f576f33f53de..79a5a994b13715a930524db3da9161da32b3a622 100644 --- a/tests/pytest/test_model_eval/test_model_node/test_unary_unary.py +++ b/tests/pytest/test_model_eval/test_model_node/test_unary_unary.py @@ -28,9 +28,11 @@ class InvalidFeatureMade(Exception): def test_un_un_model_eval(): - initialize_values_arr(90, 10, 1, 2) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 2) + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] @@ -39,7 +41,7 @@ def test_un_un_model_eval(): node_2 = InvNode(node_1, 3, 1e-50, 1e50) model_node = ModelNode(node_2) - data_1 = np.random.random(90) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 val_data = 1.0 / data_1 ** 2.0 data_dict = {"t_a": data_1[0]} diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_abs_diff_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_abs_diff_param.py index 35a14991f7b0953bc223decab1fc27bb5d44727e..9ff404aa5f1b455b2c4db31a111555251cb594ac 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_abs_diff_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_abs_diff_param.py @@ -29,15 +29,19 @@ class InvalidFeatureMade(Exception): def test_abs_diff_param_model_eval(): - initialize_values_arr(90, 10, 2, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_abs_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_abs_param.py index 2e8069fdbb4c6c9fd8146c45aa63092430708cf6..06906b5f0381b8720d50828dbc91623d2d71780c 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_abs_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_abs_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_abs_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_add_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_add_param.py index 7b0c321e802eac5de00721e246ecce38a9328c7f..44e8a998787da24ce91eafb58cba728108a35df5 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_add_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_add_param.py @@ -29,15 +29,19 @@ class InvalidFeatureMade(Exception): def test_add_param_model_eval(): - initialize_values_arr(90, 10, 2, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_binary_binary_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_binary_binary_param.py index 4077bccb6d59af224344979ddd82de99b6b3efa5..82c32af8f63f1acc866f0323cc68a1328c97fd25 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_binary_binary_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_binary_binary_param.py @@ -30,18 +30,22 @@ class InvalidFeatureMade(Exception): def test_bin_bin_model_eval(): - initialize_values_arr(90, 10, 3, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 - data_3 = np.random.random(90) * 2e4 - 1e4 - test_data_3 = np.random.random(10) * 2e4 - 1e4 + data_3 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_3 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) @@ -55,9 +59,9 @@ def test_bin_bin_model_eval(): model_node = ModelNode(node_2) - data_1 = np.random.random(90) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - data_3 = np.random.random(90) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + data_3 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 val_data = (data_1 * (3.0 * data_3 + 4.0)) + data_2 + 2.0 data_dict = {"t_a": data_1[0], "x_a": data_2[0], "m_a": data_3[0]} diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_binary_unary_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_binary_unary_param.py index a95070ae8c1e60ce7163938ff489f8e84843b818..dec60fdaf88e9c2be517537de094fa94b81386d9 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_binary_unary_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_binary_unary_param.py @@ -30,15 +30,19 @@ class InvalidFeatureMade(Exception): def test_bin_un_model_eval(): - initialize_values_arr(90, 10, 2, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) @@ -51,8 +55,8 @@ def test_bin_un_model_eval(): model_node = ModelNode(node_2) - data_1 = np.random.random(90) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 val_data = (3.0 * data_1 + 4.0) ** 2.0 + data_2 + 2.0 data_dict = {"t_a": data_1[0], "x_a": data_2[0]} diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_cb_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_cb_param.py index 09dbd11d6707f07c70b3bec0453fc3e0dd5b6033..7849eca5fa748b17c1790571ef5b0a6b81cba71e 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_cb_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_cb_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_cb_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_cbrt_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_cbrt_param.py index b04a6087567fad307a19f0c91e86658049cab8c7..6b476c57b5de487c6371cc38130f68aea7cd718d 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_cbrt_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_cbrt_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_cbrt_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_cos_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_cos_param.py index b8f7661ed70a2a2c6096aefab69366b30724c2cf..0b7a622361869579d866166933b17cd87a427602 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_cos_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_cos_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_cos_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_div_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_div_param.py index 8f5d19ca589d61e088967c4bc1652d59a7546864..7e119467b3ba522e486f0f9bfc5ecd4d814d8294 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_div_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_div_param.py @@ -29,15 +29,19 @@ class InvalidFeatureMade(Exception): def test_div_param_model_eval(): - initialize_values_arr(90, 10, 2, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_exp_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_exp_param.py index b6dd4de2069186997b1bc12205c8aef8283e2066..f09f45b408f0f36ce78d8b7c226620726dfea759 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_exp_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_exp_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_exp_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 2e1 - 1e1 - test_data_1 = np.random.random(10) * 2e1 - 1e1 + data_1 = np.random.random(task_sizes_train[0]) * 2e1 - 1e1 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e1 - 1e1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_inv_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_inv_param.py index ee92eb5a0e7fe72866d7814af8166b26dfc3f6cd..c745617435246e172a4ef2e64d6e6a682b4867bc 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_inv_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_inv_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_inv_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_log_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_log_param.py index accfb658d3e5f3a319b45c6d8a92cbefe6c70c20..ea8a5f129285da24fa89d9b82f667ad84356b6d9 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_log_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_log_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_log_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_mult_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_mult_param.py index b66c259e2c0c428779030da2957ff78fa20edd89..6395b2c4014318eff7ff3de24618859bc0264b8d 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_mult_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_mult_param.py @@ -29,15 +29,19 @@ class InvalidFeatureMade(Exception): def test_mult_param_model_eval(): - initialize_values_arr(90, 10, 2, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_neg_exp_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_neg_exp_param.py index 50dcd99c0239b60504d0dacd27c6e5483ef9911a..1ae976bab60418e38364d49d1004ab6e459b9549 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_neg_exp_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_neg_exp_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_neg_exp_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 2e1 - 1e1 - test_data_1 = np.random.random(10) * 2e1 - 1e1 + data_1 = np.random.random(task_sizes_train[0]) * 2e1 - 1e1 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e1 - 1e1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_sin_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_sin_param.py index 19ef2d0b917df9c2bdf6501355037f182772b447..028082b30c458027a40722324f34e0b607b07c20 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_sin_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_sin_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_sin_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_six_pow_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_six_pow_param.py index d282cbf21ad071657b88590db3838fd6fc7ec850..5d6b349b859bd58a169c8c1bc3d2a0f09d71888f 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_six_pow_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_six_pow_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_six_pow_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_sq_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_sq_param.py index 36d8ad1bf4707dacb7eb28789f8ac4c79dac63fc..41c21e63a41b1605fe648089e0e98b471833133f 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_sq_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_sq_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_sq_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 2e4 - 1e4 - test_data_1 = np.random.random(10) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_1 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_sqrt_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_sqrt_param.py index e18a1750ed8d419812820bae17e6b04a6231f3e0..bb47b82245b4ed24b54a87c408b11a2e34343d41 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_sqrt_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_sqrt_param.py @@ -29,12 +29,16 @@ class InvalidFeatureMade(Exception): def test_sqrt_param_model_eval(): - initialize_values_arr(90, 10, 1, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_sub_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_sub_param.py index abe0cdb3932ef284a7ab48d72f7a03b5d6454f2c..d7b12d0d8a421444735498e704816c56247ae6da 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_sub_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_sub_param.py @@ -29,15 +29,19 @@ class InvalidFeatureMade(Exception): def test_sub_parm_model_eval(): - initialize_values_arr(90, 10, 2, 1) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 1) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_unary_binary_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_unary_binary_param.py index 9f828d3934e39fd18481687fb89e27ee6e01687a..2d0eb6077491a25368178fcb2c12941d0e280f23 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_unary_binary_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_unary_binary_param.py @@ -30,15 +30,19 @@ class InvalidFeatureMade(Exception): def test_un_bin_model_eval(): - initialize_values_arr(90, 10, 2, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 2, 2) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 - test_data_2 = np.random.random(10) * 2e4 - 1e4 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 + test_data_2 = np.random.random(task_sizes_test[0]) * 2e4 - 1e4 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feat_2 = FeatureNode(1, "x_a", data_2, test_data_2, Unit()) @@ -51,8 +55,8 @@ def test_un_bin_model_eval(): model_node = ModelNode(node_2) - data_1 = np.random.random(90) * 1e4 + 0.1 - data_2 = np.random.random(90) * 2e4 - 1e4 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + data_2 = np.random.random(task_sizes_train[0]) * 2e4 - 1e4 val_data = ((data_1 + 0.5 * data_2 + 4.0) + 2.0) ** 2.0 data_dict = {"t_a": data_1[0], "x_a": data_2[0]} diff --git a/tests/pytest/test_model_eval/test_param_model_node/test_unary_unary_param.py b/tests/pytest/test_model_eval/test_param_model_node/test_unary_unary_param.py index 85f879dfbf03c6aad732aa8384a9f78443e1a171..e2fb09237ddd55f809f072c033657d3e0f03afeb 100644 --- a/tests/pytest/test_model_eval/test_param_model_node/test_unary_unary_param.py +++ b/tests/pytest/test_model_eval/test_param_model_node/test_unary_unary_param.py @@ -30,12 +30,16 @@ class InvalidFeatureMade(Exception): def test_un_un_model_eval(): - initialize_values_arr(90, 10, 1, 2) + task_sizes_train = [90] + task_sizes_test = [10] + initialize_values_arr(task_sizes_train, task_sizes_test, 1, 2) initialize_param_storage() - optimizer = get_reg_optimizer([90], np.zeros(90), 2, 2, 0.5, False) + optimizer = get_reg_optimizer( + [task_sizes_train[0]], np.zeros(task_sizes_train[0]), 2, 2, 0.5, False + ) - data_1 = np.random.random(90) * 1e4 + 0.1 - test_data_1 = np.random.random(10) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 + test_data_1 = np.random.random(task_sizes_test[0]) * 1e4 + 0.1 feat_1 = FeatureNode(0, "t_a", data_1, test_data_1, Unit()) feats = [] @@ -48,7 +52,7 @@ def test_un_un_model_eval(): model_node = ModelNode(node_2) - data_1 = np.random.random(90) * 1e4 + 0.1 + data_1 = np.random.random(task_sizes_train[0]) * 1e4 + 0.1 val_data = 1.0 / ((3.0 * data_1 + 4.0) ** 2.0 + 2.0) data_dict = {"t_a": data_1[0]} diff --git a/tests/pytest/test_param.py b/tests/pytest/test_param.py index 9616a2a15397632a909cb4976e7ff0188ac6a0ec..b46a7f880f5c8bde9c8b0e38952706f0ca85bd7e 100644 --- a/tests/pytest/test_param.py +++ b/tests/pytest/test_param.py @@ -13,13 +13,13 @@ # limitations under the License. import shutil from pathlib import Path -from sissopp.py_interface import get_fs_solver +from sissopp.py_interface import get_fs_solver, create_inputs parent = Path(__file__).parent def test_param(): - feat_sapce, sisso = get_fs_solver( + inputs = create_inputs( df=parent / "data_param.csv", prop_key="Prop", allowed_ops="all", @@ -27,11 +27,12 @@ def test_param(): cols="all", max_rung=2, n_sis_select=20, - max_dim=1, - n_residuals=1, + n_dim=1, + n_residual=1, leave_out_frac=0.025, leave_out_inds=list(range(3)) + list(range(60, 62)), ) + feat_space, sisso = get_fs_solver(inputs) sisso.fit() diff --git a/tests/pytest/test_sisso.py b/tests/pytest/test_sisso.py index 4dff4c5162a41c0050f0a879ad24dc45ffad4779..dbbc9c4b358934df4377f4a7bf5be1709ab30b0e 100644 --- a/tests/pytest/test_sisso.py +++ b/tests/pytest/test_sisso.py @@ -13,13 +13,13 @@ # limitations under the License. import shutil from pathlib import Path -from sissopp.py_interface import get_fs_solver +from sissopp.py_interface import get_fs_solver, create_inputs parent = Path(__file__).parent def test_sisso(): - feat_sapce, sisso = get_fs_solver( + inputs = create_inputs( df=str(parent / "data.csv"), prop_key="Prop", allowed_ops="all", @@ -27,12 +27,13 @@ def test_sisso(): cols="all", max_rung=2, n_sis_select=20, - max_dim=2, - n_residuals=1, + n_dim=2, + n_residual=1, task_key="Task", leave_out_frac=0.1, leave_out_inds=list(range(6)) + list(range(60, 64)), ) + feat_space, sisso = get_fs_solver(inputs) sisso.fit()