Commit 58233503 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

New input scheme working with google and exec tests

Now need to adapt the python interface to match
parent be059e2c
......@@ -22,42 +22,17 @@
#include "descriptor_identifier/solver/SISSOClassifier.hpp"
SISSOClassifier::SISSOClassifier(
const std::shared_ptr<FeatureSpace> feat_space,
const std::string prop_label,
const Unit prop_unit,
const std::vector<double> prop,
const std::vector<double> prop_test,
const std::vector<int> task_sizes_train,
const std::vector<int> task_sizes_test,
const std::vector<int> leave_out_inds,
const int n_dim,
const int n_residual,
const int n_models_store,
const std::vector<std::string> sample_ids_train,
const std::vector<std::string> sample_ids_test,
const std::vector<std::string> task_names
const InputParser inputs, const std::shared_ptr<FeatureSpace> feat_space
):
SISSOSolver(
"classification",
feat_space,
prop_label,
prop_unit,
prop,
prop_test,
task_sizes_train,
task_sizes_test,
leave_out_inds,
n_dim,
n_residual,
n_models_store,
sample_ids_train,
sample_ids_test,
task_names,
false
),
SISSOSolver(inputs, feat_space),
_c(1000.0),
_width(1.0e-5)
{
if(_fix_intercept)
{
std::cerr << "For classification the bias term can't be fixed at 0. Changing it now." << std::endl;
_fix_intercept = false;
}
setup_d_mat_transfer();
}
......
......@@ -51,38 +51,17 @@ protected:
int _n_class; //!< The number of classes in the calculation
public:
/**
* @brief Create a SISSOClassifier object
// DocString: sisso_class_init
/**
* @brief Constructor for the classifier
*
* @param inputs The InputParser object for the calculation
* @param feat_space Feature Space for the problem
* @param prop_label The label for the property
* @param prop_unit The unit of the property
* @param prop The value of the property to evaluate the loss function against for the training set
* @param prpo_test The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train Number of training samples per task
* @param task_sizes_test Number of testing samples per task
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim The maximum number of features allowed in the linear model
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
*/
SISSOClassifier(
const std::shared_ptr<FeatureSpace> feat_space,
const std::string prop_label, Unit prop_unit,
const std::vector<double> prop,
const std::vector<double> prop_test,
const std::vector<int> task_sizes_train,
const std::vector<int> task_sizes_test,
const std::vector<int> leave_out_inds,
const int n_dim,
const int n_residual,
const int n_models_store,
const std::vector<std::string> sample_ids_train,
const std::vector<std::string> sample_ids_test,
const std::vector<std::string> task_names
const InputParser inputs,
const std::shared_ptr<FeatureSpace> feat_space
);
/**
......@@ -136,78 +115,6 @@ public:
// Python interface functions
#ifdef PY_BINDINGS
// DocString: sisso_class_init_arr
/**
* @brief Create a SISSOClassifier object
*
* @param feat_space (FeatureSpace) Feature Space for the problem
* @param prop_label (str) The label for the property
* @param prop_unit (Unit) The unit of the property
* @param prop (np.ndarray) The value of the property to evaluate the loss function against for the training set
* @param prpo_test (np.ndarray) The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train (list) Number of training samples per task
* @param task_sizes_test (list) Number of testing samples per task
* @param leave_out_inds (list) List of indexes from the initial data file in the test set
* @param n_dim (int) The maximum number of features allowed in the linear model
* @param n_residual (int) Number of residuals to pass to the next SIS operation
* @param n_models_store (int) The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
*/
SISSOClassifier(
std::shared_ptr<FeatureSpace> feat_space,
std::string prop_label,
Unit prop_unit,
np::ndarray prop,
np::ndarray prop_test,
py::list task_sizes_train,
py::list task_sizes_test,
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
py::list sample_ids_train,
py::list sample_ids_test,
py::list task_names
);
// DocString: sisso_class_init_list
/**
* @brief Create a SISSOClassifier object
*
* @param feat_space (FeatureSpace) Feature Space for the problem
* @param prop_label (str) The label for the property
* @param prop_unit (Unit) The unit of the property
* @param prop (list) The value of the property to evaluate the loss function against for the training set
* @param prpo_test (list) The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train (list) Number of training samples per task
* @param task_sizes_test (list) Number of testing samples per task
* @param leave_out_inds (list) List of indexes from the initial data file in the test set
* @param n_dim (int) The maximum number of features allowed in the linear model
* @param n_residual (int) Number of residuals to pass to the next SIS operation
* @param n_models_store (int) The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
*/
SISSOClassifier(
std::shared_ptr<FeatureSpace> feat_space,
std::string prop_label,
Unit prop_unit,
py::list prop,
py::list prop_test,
py::list task_sizes_train,
py::list task_sizes_test,
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
py::list sample_ids_train,
py::list sample_ids_test,
py::list task_names
);
// DocString: sisso_class_models_py
/**
* @brief The selected models (n_dim, n_models_store)
......
......@@ -22,44 +22,14 @@
#include "descriptor_identifier/solver/SISSOLogRegressor.hpp"
SISSOLogRegressor::SISSOLogRegressor(
const std::shared_ptr<FeatureSpace> feat_space,
const std::string prop_label,
const Unit prop_unit,
const std::vector<double> prop,
const std::vector<double> prop_test,
const std::vector<int> task_sizes_train,
const std::vector<int> task_sizes_test,
const std::vector<int> leave_out_inds,
const int n_dim,
const int n_residual,
const int n_models_store,
const std::vector<std::string> sample_ids_train,
const std::vector<std::string> sample_ids_test,
const std::vector<std::string> task_names,
const bool fix_intercept
const InputParser inputs, const std::shared_ptr<FeatureSpace> feat_space
):
SISSORegressor(
feat_space,
prop_label,
prop_unit,
prop,
prop_test,
task_sizes_train,
task_sizes_test,
leave_out_inds,
n_dim,
n_residual,
n_models_store,
sample_ids_train,
sample_ids_test,
task_names,
fix_intercept
)
SISSORegressor(inputs, feat_space)
{
std::vector<double> log_prop(prop);
std::vector<double> log_prop_test(prop_test);
std::transform(prop.begin(), prop.end(), log_prop.begin(), [](double p){return std::log(p);});
std::transform(prop_test.begin(), prop_test.end(), log_prop_test.begin(), [](double p){return std::log(p);});
std::vector<double> log_prop(inputs.prop_train().size(), 0.0);
std::vector<double> log_prop_test(inputs.prop_test().size(), 0.0);
std::transform(inputs.prop_train().begin(), inputs.prop_train().end(), log_prop.begin(), [](double p){return std::log(p);});
std::transform(inputs.prop_test().begin(), inputs.prop_test().end(), log_prop_test.begin(), [](double p){return std::log(p);});
_loss = loss_function_util::get_loss_function(
"log_regression",
......
......@@ -41,41 +41,17 @@ private:
std::vector<std::vector<ModelLogRegressor>> _models; //!< List of models
public:
// DocString: sisso_log_reg_init
/**
* @brief Create a SISSOLogRegressor object
* @brief Constructor for the log regressor
*
* @param inputs The InputParser object for the calculation
* @param feat_space Feature Space for the problem
* @param prop_label The label for the property
* @param prop_unit The unit of the property
* @param prop The value of the property to evaluate the loss function against for the training set
* @param prpo_test The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train Number of training samples per task
* @param task_sizes_test Number of testing samples per task
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim The maximum number of features allowed in the linear model
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
* @param fix_intrecept If true the bias term is fixed at 0
*/
SISSOLogRegressor(
const std::shared_ptr<FeatureSpace> feat_space,
const std::string prop_label,
const Unit prop_unit,
const std::vector<double> prop,
const std::vector<double> prop_test,
const std::vector<int> task_sizes_train,
const std::vector<int> task_sizes_test,
const std::vector<int> leave_out_inds,
const int n_dim,
const int n_residual,
const int n_models_store,
const std::vector<std::string> sample_ids_train,
const std::vector<std::string> sample_ids_test,
const std::vector<std::string> task_names,
const bool fix_intercept=false
const InputParser inputs,
const std::shared_ptr<FeatureSpace> feat_space
);
/**
......@@ -104,81 +80,6 @@ public:
// Python interface functions
#ifdef PY_BINDINGS
// DocString: sisso_log_reg_init_arr
/**
* @brief Create a SISSOLogRegressor object
*
* @param feat_space (FeatureSpace) Feature Space for the problem
* @param prop_label (str) The label for the property
* @param prop_unit (Unit) The unit of the property
* @param prop (np.ndarray) The value of the property to evaluate the loss function against for the training set
* @param prpo_test (np.ndarray) The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train (list) Number of training samples per task
* @param task_sizes_test (list) Number of testing samples per task
* @param leave_out_inds (list) List of indexes from the initial data file in the test set
* @param n_dim (int) The maximum number of features allowed in the linear model
* @param n_residual (int) Number of residuals to pass to the next SIS operation
* @param n_models_store (int) The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
* @param fix_intrecept (bool) If true the bias term is fixed at 0
*/
SISSOLogRegressor(
std::shared_ptr<FeatureSpace> feat_space,
std::string prop_label,
Unit prop_unit,
np::ndarray prop,
np::ndarray prop_test,
py::list task_sizes_train,
py::list task_sizes_test,
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
py::list sample_ids_train,
py::list sample_ids_test,
py::list task_names,
bool fix_intercept=false
);
// DocString: sisso_log_reg_init_list
/**
* @brief Create a SISSOLogRegressor object
*
* @param feat_space (FeatureSpace) Feature Space for the problem
* @param prop_label (str) The label for the property
* @param prop_unit (Unit) The unit of the property
* @param prop (list) The value of the property to evaluate the loss function against for the training set
* @param prpo_test (list) The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train (list) Number of training samples per task
* @param task_sizes_test (list) Number of testing samples per task
* @param leave_out_inds (list) List of indexes from the initial data file in the test set
* @param n_dim (int) The maximum number of features allowed in the linear model
* @param n_residual (int) Number of residuals to pass to the next SIS operation
* @param n_models_store (int) The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
* @param fix_intrecept (bool) If true the bias term is fixed at 0
*/
SISSOLogRegressor(
std::shared_ptr<FeatureSpace> feat_space,
std::string prop_label,
Unit prop_unit,
py::list prop,
py::list prop_test,
py::list task_sizes_train,
py::list task_sizes_test,
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
py::list sample_ids_train,
py::list sample_ids_test,
py::list task_names,
bool fix_intercept=false
);
// DocString: sisso_log_reg_models_py
/**
......
......@@ -22,40 +22,9 @@
#include "descriptor_identifier/solver/SISSORegressor.hpp"
SISSORegressor::SISSORegressor(
const std::shared_ptr<FeatureSpace> feat_space,
const std::string prop_label,
const Unit prop_unit,
const std::vector<double> prop,
const std::vector<double> prop_test,
const std::vector<int> task_sizes_train,
const std::vector<int> task_sizes_test,
const std::vector<int> leave_out_inds,
const int n_dim,
const int n_residual,
const int n_models_store,
const std::vector<std::string> sample_ids_train,
const std::vector<std::string> sample_ids_test,
const std::vector<std::string> task_names,
const bool fix_intercept
const InputParser inputs, const std::shared_ptr<FeatureSpace> feat_space
):
SISSOSolver(
"regression",
feat_space,
prop_label,
prop_unit,
prop,
prop_test,
task_sizes_train,
task_sizes_test,
leave_out_inds,
n_dim,
n_residual,
n_models_store,
sample_ids_train,
sample_ids_test,
task_names,
fix_intercept
)
SISSOSolver(inputs, feat_space)
{}
void SISSORegressor::add_models(const std::vector<std::vector<int>> indexes)
......
......@@ -41,41 +41,16 @@ private:
std::vector<std::vector<ModelRegressor>> _models; //!< List of models
public:
// DocString: sisso_reg_init
/**
* @brief Create a SISSORegressor object
* @brief Constructor for the regressor
*
* @param inputs The InputParser object for the calculation
* @param feat_space Feature Space for the problem
* @param prop_label The label for the property
* @param prop_unit The unit of the property
* @param prop The value of the property to evaluate the loss function against for the training set
* @param prpo_test The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train Number of training samples per task
* @param task_sizes_test Number of testing samples per task
* @param leave_out_inds List of indexes from the initial data file in the test set
* @param n_dim The maximum number of features allowed in the linear model
* @param n_residual Number of residuals to pass to the next SIS operation
* @param n_models_store The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
* @param fix_intrecept If true the bias term is fixed at 0
*/
SISSORegressor(
const std::shared_ptr<FeatureSpace> feat_space,
const std::string prop_label,
const Unit prop_unit,
const std::vector<double> prop,
const std::vector<double> prop_test,
const std::vector<int> task_sizes_train,
const std::vector<int> task_sizes_test,
const std::vector<int> leave_out_inds,
const int n_dim,
const int n_residual,
const int n_models_store,
const std::vector<std::string> sample_ids_train,
const std::vector<std::string> sample_ids_test,
const std::vector<std::string> task_names,
const bool fix_intercept=false
const InputParser inputs,
const std::shared_ptr<FeatureSpace> feat_space
);
/**
......@@ -111,82 +86,6 @@ public:
// Python interface functions
#ifdef PY_BINDINGS
// DocString: sisso_reg_init_arr
/**
* @brief Create a SISSORegressor object
*
* @param feat_space (FeatureSpace) Feature Space for the problem
* @param prop_label (str) The label for the property
* @param prop_unit (Unit) The unit of the property
* @param prop (np.ndarray) The value of the property to evaluate the loss function against for the training set
* @param prpo_test (np.ndarray) The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train (list) Number of training samples per task
* @param task_sizes_test (list) Number of testing samples per task
* @param leave_out_inds (list) List of indexes from the initial data file in the test set
* @param n_dim (int) The maximum number of features allowed in the linear model
* @param n_residual (int) Number of residuals to pass to the next SIS operation
* @param n_models_store (int) The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
* @param fix_intrecept (bool) If true the bias term is fixed at 0
*/
SISSORegressor(
std::shared_ptr<FeatureSpace> feat_space,
std::string prop_label,
Unit prop_unit,
np::ndarray prop,
np::ndarray prop_test,
py::list task_sizes_train,
py::list task_sizes_test,
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
py::list sample_ids_train,
py::list sample_ids_test,
py::list task_names,
bool fix_intercept=false
);
// DocString: sisso_reg_init_list
/**
* @brief Create a SISSORegressor object
*
* @param feat_space (FeatureSpace) Feature Space for the problem
* @param prop_label (str) The label for the property
* @param prop_unit (Unit) The unit of the property
* @param prop (list) The value of the property to evaluate the loss function against for the training set
* @param prpo_test (list) The value of the property to evaluate the loss function against for the test set
* @param task_sizes_train (list) Number of training samples per task
* @param task_sizes_test (list) Number of testing samples per task
* @param leave_out_inds (list) List of indexes from the initial data file in the test set
* @param n_dim (int) The maximum number of features allowed in the linear model
* @param n_residual (int) Number of residuals to pass to the next SIS operation
* @param n_models_store (int) The number of models to output to files
* @param sample_ids_train A list storing all sample ids for the training samples
* @param sample_ids_test A list storing all sample ids for the test samples
* @param task_names A list storing the ID of the task names
* @param fix_intrecept (bool) If true the bias term is fixed at 0
*/
SISSORegressor(
std::shared_ptr<FeatureSpace> feat_space,
std::string prop_label,
Unit prop_unit,
py::list prop,
py::list prop_test,
py::list task_sizes_train,
py::list task_sizes_test,
py::list leave_out_inds,
int n_dim,
int n_residual,
int n_models_store,
py::list sample_ids_train,
py::list sample_ids_test,
py::list task_names,
bool fix_intercept=false
);
// DocString: sisso_reg_models_py
/**
* @brief The selected models (n_dim, n_models_store)
......
......@@ -22,44 +22,29 @@
#include "descriptor_identifier/solver/SISSOSolver.hpp"
SISSOSolver::SISSOSolver(
const std::string loss_type,
const std::shared_ptr<FeatureSpace> feat_space,
const std::string prop_label,
const Unit prop_unit,
const std::vector<double> prop,
const std::vector<double> prop_test,
const std::vector<int> task_sizes_train,
const std::vector<int> task_sizes_test,
const std::vector<int> leave_out_inds,
const int n_dim,
const int n_residual,
const int n_models_store,
const std::vector<std::string> sample_ids_train,
const std::vector<std::string> sample_ids_test,
const std::vector<std::string> task_names,
const bool fix_intercept
const InputParser inputs, const std::shared_ptr<FeatureSpace> feat_space
):
_sample_ids_train(sample_ids_train),
_sample_ids_test(sample_ids_test),
_task_names(task_names),
_task_sizes_train(task_sizes_train),
_task_sizes_test(task_sizes_test),
_leave_out_inds(leave_out_inds),
_prop_label(prop_label),
_prop_unit(prop_unit),
_sample_ids_train(inputs.sample_ids_train()),
_sample_ids_test(inputs.sample_ids_test()),
_task_names(inputs.task_names()),
_task_sizes_train(inputs.task_sizes_train()),
_task_sizes_test(inputs.task_sizes_test()),
_leave_out_inds(inputs.leave_out_inds()),
_prop_label(inputs.prop_label()),
_prop_unit(inputs.prop_unit()),
_feat_space(feat_space),
_mpi_comm(feat_space->mpi_comm()),
_n_task(task_sizes_train.size()),
_n_samp(prop.size()),
_n_dim(n_dim),
_n_residual(n_residual),
_n_models_store(n_models_store),
_fix_intercept(fix_intercept)
_n_task(inputs.task_sizes_train().size()),
_n_samp(inputs.prop_train().size()),
_n_dim(inputs.n_dim()),
_n_residual(inputs.n_residual()),
_n_models_store(inputs.n_models_store()),
_fix_intercept(inputs.fix_intercept())
{
_loss = loss_function_util::get_loss_function(
loss_type,
prop,
prop_test,
inputs.calc_type(),
inputs.prop_train(),
inputs.prop_test(),