diff --git a/src/classification/utils.cpp b/src/classification/utils.cpp index 8de21889b23a1bd7adf34570cbd84cf8a6bd62f1..105ed4b65fd68c5af68e29f36c1f730ac469a3ca 100644 --- a/src/classification/utils.cpp +++ b/src/classification/utils.cpp @@ -91,4 +91,17 @@ double convex_hull::overlap_1d(double* value, double width) } } return util_funcs::mean(TASK_SCORES); -} \ No newline at end of file +} + +// int lp_utils::pt_in_convex_hull(ClpSimplex& lp_simplex, std::vector<int>& inds, double* row_lower, double* row_upper, int samp_ind) +// { +// std::transform(inds.begin(), inds.end(), row_lower, [&samp_ind](int ind){return prop_sorted_d_mat::get_sorted_d_matrix_el(ind, samp_ind);}); +// std::transform(inds.begin(), inds.end(), row_upper, [&samp_ind](int ind){return prop_sorted_d_mat::get_sorted_d_matrix_el(ind, samp_ind);}); + +// lp_simplex.chgRowLower(row_lower); +// lp_simplex.chgRowUpper(row_upper); + +// lp_simplex.primal(); + +// return lp_simplex.primalFeasible(); +// } diff --git a/src/classification/utils.hpp b/src/classification/utils.hpp index 5cc7ba3763b60b7b6710df2aa26a81754dd1e3c0..c1e2e0334341e0f2b8765fa6d448173798347c95 100644 --- a/src/classification/utils.hpp +++ b/src/classification/utils.hpp @@ -10,6 +10,11 @@ #include <utils/math_funcs.hpp> #include <iomanip> + +#include <coin/ClpSimplex.hpp> + +#include <classification/prop_sorted_d_mat.hpp> + namespace convex_hull { extern std::vector<double> SORTED_VALUE; //!< The value sorted based on the property @@ -39,4 +44,5 @@ namespace convex_hull double overlap_1d(double* value, double width = 1e-10); } + #endif \ No newline at end of file diff --git a/src/descriptor_identifier/Model/ModelClassifier.cpp b/src/descriptor_identifier/Model/ModelClassifier.cpp index ee57d28d188a8c61f54563f8ce02c71b7498ec72..62995cf01a0848a358cdad60840442f810d22e73 100644 --- a/src/descriptor_identifier/Model/ModelClassifier.cpp +++ b/src/descriptor_identifier/Model/ModelClassifier.cpp @@ -16,6 +16,7 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train, int n_row = _n_dim + 1; ClpSimplex lp_simplex; //!< Model used to find if a point is inside the convex hull lp_simplex.setLogLevel(0); + lp_simplex.setMoreSpecialOptions(2); std::vector<std::vector<int>> inds_check; std::vector<double> lp_column_lower(_n_samp_train + 1, 0.0); @@ -76,13 +77,8 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train, int n_col = inds_class_train[c1].size(); for(int dd = 0; dd < _n_dim; ++dd) - { for(int ii = 0; ii < n_col; ++ii) - { lp_elements[ii * n_row + dd] = _feats[dd]->value_ptr()[inds_class_train[c1][ii]]; - // std::cout << inds_class_train[c1][ii] << ',' << lp_elements[ii * n_row + dd] << ',' << lp_elements[ii * n_row + dd + 1] << std::endl; - } - } for(int c2 = 0; c2 < inds_class_train.size(); ++c2) { @@ -134,7 +130,6 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train, } // Setup ML Shark data structures for SVM training - std::cout << "setup ml" << std::endl; std::vector<shark::LabeledData<shark::RealVector, unsigned int>> training(_task_sizes_train.size()); std::vector<shark::LabeledData<shark::RealVector, unsigned int>> test(_task_sizes_test.size()); std::vector<unsigned int> prop_to_copy(_prop_train.size()); @@ -189,6 +184,15 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train, n_els += shark::batchSize(batch); n_copied += shark::batchSize(batch); } + + n_copied = 0; + for(auto& batch : data.batches()) + { + for(int ff = 0; ff < _n_dim; ++ff) + dcopy_(shark::batchSize(batch), _feats[ff]->svm_test_value_ptr() + n_copied, 1, &batch(0, 0) + ff, _n_dim); + + n_copied += shark::batchSize(batch); + } test[tt] = shark::LabeledData<shark::RealVector, unsigned int>(data, labels); } @@ -205,11 +209,10 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train, int ii_test = 0; for(int tt = 0; tt < _task_sizes_train.size(); ++tt) { - std::cout << "perform svm" << std::endl; trainer.stoppingCondition().maxSeconds = 0.001; + trainer.stoppingCondition().maxIterations = 1000; + trainer.stoppingCondition().minAccuracy = 0.01; trainer.train(model, training[tt]); - - std::cout << "process svm" << std::endl; if(_fix_intercept) { _coefs.push_back(std::vector<double>(_n_dim, 0.0)); @@ -222,34 +225,16 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train, _coefs.push_back(std::vector<double>(_n_dim + 1, 0.0)); std::copy_n(model.parameterVector().begin(), _n_dim + 1, _coefs.back().begin()); for(int cc = 0; cc < _n_dim; ++cc) + { _coefs.back()[cc] = _feats[cc]->remap_w_svm(_coefs.back()[cc]); - _coefs.back()[_n_dim] = _feats.back()->remap_b_svm(_coefs.back()[_n_dim]); - } + _coefs.back()[_n_dim] -= _feats[cc]->remap_b_svm(_coefs.back()[cc]); - // output = model(training[tt].inputs()); - // for(auto element: output.elements()) - // { - // _prop_train_est[ii_train] = static_cast<double>(element); - // ++ii_train; - // } - - // if(_task_sizes_test[tt] == 0) - // continue; - // output = model(test[tt].inputs()); - // for(auto element: output.elements()) - // { - // _prop_test_est[ii_test] = static_cast<double>(element); - // ++ii_test; - // } + } + } } - std::cout << "set error" << std::endl; + std::transform(_prop_train_est.begin(), _prop_train_est.end(), _prop_train.begin(), _train_error.begin(), [](double est, double real){return (std::abs(est - real) < 1e-10) ? -1 : real;}); std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.begin(), _test_error.begin(), [](double est, double real){return (std::abs(est - real) < 1e-10) ? -1 : real;}); - std::cout << *std::min_element(_train_error.begin(), _train_error.end()) << std::endl; - std::cout << *std::max_element(_train_error.begin(), _train_error.end()) << std::endl; - std::cout << std::count_if(_train_error.begin(), _train_error.end(), [](double te){return te >= 0;}) << std::endl; - - std::cout << "out" << std::endl; } ModelClassifier::ModelClassifier(std::string train_file) diff --git a/src/descriptor_identifier/Model/ModelClassifier.hpp b/src/descriptor_identifier/Model/ModelClassifier.hpp index 7f62a4650100037b9fa6ae8522816db4c1312dc6..f339142b315e5aa9310a3d7e4f9a3febdf1040dc 100644 --- a/src/descriptor_identifier/Model/ModelClassifier.hpp +++ b/src/descriptor_identifier/Model/ModelClassifier.hpp @@ -18,9 +18,11 @@ #include<fstream> #include<iostream> +#include <shark/Algorithms/Trainers/CSvmTrainer.h> +#include <shark/Algorithms/Trainers/NormalizeComponentsUnitVariance.h> #include <shark/Data/Dataset.h> +#include <shark/Models/Normalizer.h> #include <shark/ObjectiveFunctions/Loss/ZeroOneLoss.h> -#include <shark/Algorithms/Trainers/CSvmTrainer.h> #include <coin/ClpSimplex.hpp> #include <descriptor_identifier/Model/Model.hpp> diff --git a/src/descriptor_identifier/SISSO_DI/SISSOClassifier.cpp b/src/descriptor_identifier/SISSO_DI/SISSOClassifier.cpp index dcb18e744e4ed5593b431e782fbabda8d76696e3..b12f13e7dd07a9fc89fa483e8879ff957fbc94bd 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSOClassifier.cpp +++ b/src/descriptor_identifier/SISSO_DI/SISSOClassifier.cpp @@ -15,8 +15,8 @@ SISSOClassifier::SISSOClassifier( ): SISSO_DI(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept), _lp_elements((n_dim + 1) * _n_samp, 0.0), - _lp_row_lower(_n_dim + 1, 1.0), - _lp_row_upper(_n_dim + 1, 1.0), + _lp_row_lower((n_dim + 1) * _n_samp, 1.0), + _lp_row_upper((n_dim + 1) * _n_samp, 1.0), _lp_objective(_n_samp, 0.0), _lp_column_lower(_n_samp, 0.0), _lp_column_upper(_n_samp, COIN_DBL_MAX), @@ -37,10 +37,8 @@ SISSOClassifier::SISSOClassifier( if(std::none_of(prop.begin(), prop.end(), [&pt](double pp){return std::abs(pp - pt) < 1e-10;})) throw std::logic_error("A class in the property vector (test set) is not in the training set."); - _lp_row_lower[_n_dim] -= _width; - _lp_row_upper[_n_dim] += _width; - _lp_simplex.setLogLevel(0); + _lp_simplex.setMoreSpecialOptions(8192 + 64 + 2); _trainer.stoppingCondition().maxSeconds = 0.001; std::vector<int> inds = util_funcs::argsort(prop); @@ -87,7 +85,10 @@ SISSOClassifier::SISSOClassifier( std::iota(inds.begin(), inds.end(), task_start); for(int cc = 0; cc < _n_class; ++cc) - _inds_check.push_back(inds); + { + _class_task_check.push_back(std::vector<int>(_n_class)); + std::iota(_class_task_check.back().begin(), _class_task_check.back().end(), _n_class * tt); + } util_funcs::argsort(inds.data(), inds.data() + inds.size(), &_prop[task_start]); _sample_inds_to_sorted_dmat_inds[inds[0]] = task_start; @@ -98,13 +99,13 @@ SISSOClassifier::SISSOClassifier( _sample_inds_to_sorted_dmat_inds[inds[ii]] = ii + task_start; if(_prop[inds[ii]] != _prop[inds[ii - 1]]) { - _inds_check[tt * _n_class + cc].erase(_inds_check[tt * _n_class + cc].begin() + cls_start, _inds_check[tt * _n_class + cc].begin() + ii); + _class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc); n_samp_per_class.push_back(ii - cls_start); cls_start = ii; ++cc; } } - _inds_check[tt * _n_class + cc].erase(_inds_check[tt * _n_class + cc].begin() + cls_start, _inds_check[tt * _n_class + cc].end()); + _class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc); n_samp_per_class.push_back(inds.size() - cls_start); if(n_samp_per_class.size() != (tt + 1) * _n_class) @@ -158,10 +159,11 @@ void SISSOClassifier::set_data(std::vector<int>& inds, int start, int task) void SISSOClassifier::svm(std::vector<int>& inds, double* coefs, int start, int task) { _trainer.stoppingCondition().maxSeconds = 0.001; + _trainer.stoppingCondition().maxIterations = 1000; + _trainer.stoppingCondition().minAccuracy = 0.01; // _trainer.stoppingCondition().maxIterations = 100; set_data(inds, start, task); - std::cout << _training[0].inputs().elements()[0] << std::endl; _trainer.train(_model, _training[task]); std::copy_n(_model.parameterVector().begin(), inds.size() + (!_fix_intercept), coefs); if(!_fix_intercept) @@ -243,6 +245,17 @@ void SISSOClassifier::set_lp_data(int task, int cls) int n_dim = _cur_inds.size(); for(int ii = 0; ii < n_dim; ++ii) dcopy_(_lp_n_col, prop_sorted_d_mat::access_sorted_d_matrix(_cur_inds[ii], task, cls), 1, &_lp_elements[ii], _lp_n_row); + int n_copied = 0; + for(auto& cls_task_ind : _class_task_check[task * _n_class + cls]) + { + for(int ii = 0; ii < _cur_inds.size(); ++ii) + { + dcopy_(prop_sorted_d_mat::N_SAMPLES_PER_CLASS[cls_task_ind], prop_sorted_d_mat::access_sorted_d_matrix(_cur_inds[ii], task, cls_task_ind % _n_class), 1, &_lp_row_lower[n_copied + ii], _lp_n_row); + dcopy_(prop_sorted_d_mat::N_SAMPLES_PER_CLASS[cls_task_ind], prop_sorted_d_mat::access_sorted_d_matrix(_cur_inds[ii], task, cls_task_ind % _n_class), 1, &_lp_row_upper[n_copied + ii], _lp_n_row); + } + n_copied += prop_sorted_d_mat::N_SAMPLES_PER_CLASS[cls_task_ind] * _cur_inds.size(); + } + _sz_cls_task_check = n_copied / _cur_inds.size(); _lp_simplex.loadProblem( _lp_n_col, @@ -258,17 +271,19 @@ void SISSOClassifier::set_lp_data(int task, int cls) ); } -int SISSOClassifier::pt_in_convex_hull_lp(int samp_ind) +int SISSOClassifier::pt_in_convex_hull_lp(int cls_task_ind) { - std::transform(_cur_inds.begin(), _cur_inds.end(), _lp_row_upper.begin(), [&samp_ind](int ind){return prop_sorted_d_mat::get_sorted_d_matrix_el(ind, samp_ind);}); - std::transform(_cur_inds.begin(), _cur_inds.end(), _lp_row_lower.begin(), [&samp_ind](int ind){return prop_sorted_d_mat::get_sorted_d_matrix_el(ind, samp_ind);}); - - _lp_simplex.chgRowLower(_lp_row_lower.data()); - _lp_simplex.chgRowUpper(_lp_row_upper.data()); + int n_overlap = 0; + for(int ii = 0; ii < _sz_cls_task_check; ++ii) + { + _lp_simplex.chgRowLower(_lp_row_lower.data() + ii * _lp_n_row); + _lp_simplex.chgRowUpper(_lp_row_upper.data() + ii * _lp_n_row); - _lp_simplex.primal(); + _lp_simplex.dual(); + n_overlap += _lp_simplex.primalFeasible(); + } - return _lp_simplex.primalFeasible(); + return n_overlap; } int SISSOClassifier::get_n_overlap_lp() @@ -280,8 +295,9 @@ int SISSOClassifier::get_n_overlap_lp() { _lp_n_col = prop_sorted_d_mat::get_class_size(tt, cc); set_lp_data(tt, cc); - for(auto& ind : _inds_check[tt * _n_class + cc]) - n_overlap += pt_in_convex_hull_lp(ind); + // n_overlap += std::accumulate(_inds_check[tt * _n_class + cc].begin(), _inds_check[tt * _n_class + cc].end(), 0, [this](int tot, int samp_ind){return tot + pt_in_convex_hull_lp(samp_ind);}); + for(auto& cls_task_ind : _class_task_check[tt * _n_class + cc]) + n_overlap += pt_in_convex_hull_lp(cls_task_ind); } } return n_overlap; @@ -294,6 +310,8 @@ void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim) _lp_n_row = n_dim + 1; std::fill_n(_lp_elements.begin(), _lp_elements.size(), 1.0); + std::fill_n(_lp_row_lower.begin(), _lp_row_lower.size(), 1.0 - _width); + std::fill_n(_lp_row_upper.begin(), _lp_row_upper.size(), 1.0 + _width); std::iota(_lp_column_start.begin(), _lp_column_start.end(), 0); std::transform(_lp_column_start.begin(), _lp_column_start.end(), _lp_column_start.begin(), [&n_dim](int ii){return (n_dim + 1) * ii;}); @@ -332,10 +350,6 @@ void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim) } } while(util_funcs::iterate(_cur_inds, _cur_inds.size(), _mpi_comm->size())); - - for(int nn = 0; nn < min_n_convex_overlap.size(); ++nn) - std::cout << nn << '\t' << min_n_convex_overlap[nn] << '\t' << min_inds[nn].size() << std::endl; - std::vector<int> inds_min(n_get_models * n_dim, -1); if(min_inds[max_error_ind].size() > 1) { @@ -353,7 +367,7 @@ void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim) } test_inds = find_min_svm(test_inds, n_keep); for(int nn = 0; nn < n_keep; ++nn) - min_inds[inds_change[nn]] = {test_inds[inds_change[nn]]}; + min_inds[inds_change[nn]] = {test_inds[nn]}; } for(int ii = 0; ii < n_get_models; ++ii) std::copy_n(min_inds[ii][0].begin(), min_inds[ii][0].size(), &inds_min[n_dim * ii]); @@ -396,11 +410,9 @@ void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim) min_nodes[ii] = std::make_shared<ModelNode>(_feat_space->phi_selected()[index]->arr_ind(), _feat_space->phi_selected()[index]->rung(), _feat_space->phi_selected()[index]->expr(), _feat_space->phi_selected()[index]->postfix_expr(), _feat_space->phi_selected()[index]->value(), _feat_space->phi_selected()[index]->test_value(), _feat_space->phi_selected()[index]->unit()); } models.push_back(ModelClassifier(_prop_unit, _prop, _prop_test, min_nodes, _task_sizes_train, _task_sizes_test, _fix_intercept)); - std::cout << models.back().n_svm_misclassified_train() << std::endl; } _models.push_back(models); - std::cout << _models.back().back().n_svm_misclassified_train() << std::endl; } void SISSOClassifier::fit() @@ -462,19 +474,15 @@ void SISSOClassifier::fit() if(_mpi_comm->rank() == 0) { std::cout << "Time for l0-norm: " << duration << std::endl; - std::cout << _models.back().back().n_svm_misclassified_train() << std::endl; - for(int rr = 0; rr < _n_models_store; ++rr) { _models.back()[rr].to_file("models/train_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat"); if(_leave_out_inds.size() > 0) _models.back()[rr].to_file("models/test_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds); } - std::cout << _models.back().back().n_svm_misclassified_train() << std::endl; } for(int rr = 0; rr < _n_residual; ++rr) _models.back()[rr].copy_error(&residual[rr * _n_samp]); - std::cout << _models.back().back().n_svm_misclassified_train() << std::endl; } } diff --git a/src/descriptor_identifier/SISSO_DI/SISSOClassifier.hpp b/src/descriptor_identifier/SISSO_DI/SISSOClassifier.hpp index 49100144cb10f0aa866f6f794cf3a9418eed3d8e..fb2c461799513778f610b8dd714d224a924f6c9c 100644 --- a/src/descriptor_identifier/SISSO_DI/SISSOClassifier.hpp +++ b/src/descriptor_identifier/SISSO_DI/SISSOClassifier.hpp @@ -37,7 +37,7 @@ protected: ClpSimplex _lp_simplex; //!< Model used to find if a point is inside the convex hull std::vector<std::vector<ModelClassifier>> _models; //!< List of models - std::vector<std::vector<int>> _inds_check; + std::vector<std::vector<int>> _class_task_check; std::vector<double> _lp_elements; std::vector<double> _lp_row_lower; @@ -66,6 +66,7 @@ protected: double _c; double _width; + int _sz_cls_task_check; int _n_class; int _lp_n_col; int _lp_n_row; diff --git a/src/feature_creation/node/ModelNode.cpp b/src/feature_creation/node/ModelNode.cpp index 33e05b243b42c962ae211a68623f4e0cc254a2d6..dc5d33767272411cf594a6d5da045cd442229e53 100644 --- a/src/feature_creation/node/ModelNode.cpp +++ b/src/feature_creation/node/ModelNode.cpp @@ -11,16 +11,16 @@ ModelNode::ModelNode(int feat_ind, int rung, std::string expr, std::string post_ _b_remap_svm(0.0), _w_remap_svm(1.0), _rung(rung) - { - double w_remap_svm_temp = 2.0 / (*std::max_element(_value.begin(), _value.end()) - *std::min_element(_value.begin(), _value.end())); - double b_remap_svm_temp = w_remap_svm_temp * (*std::max_element(_value.begin(), _value.end()) + *std::min_element(_value.begin(), _value.end())) / 2.0; +{ + double w_remap_svm_temp = 1.0 / (*std::max_element(_value.begin(), _value.end()) - *std::min_element(_value.begin(), _value.end())); + double b_remap_svm_temp = *std::min_element(_value.begin(), _value.end()); _w_remap_svm = w_remap_svm_temp; _b_remap_svm = b_remap_svm_temp; - std::transform(_value.begin(), _value.end(), _value_svm.begin(), [w_remap_svm_temp, b_remap_svm_temp](double val){return val * w_remap_svm_temp - b_remap_svm_temp;}); - std::transform(_test_value.begin(), _test_value.end(), _test_value_svm.begin(), [w_remap_svm_temp, b_remap_svm_temp](double val){return val * w_remap_svm_temp - b_remap_svm_temp;}); - } + std::transform(_value.begin(), _value.end(), _value_svm.begin(), [w_remap_svm_temp, b_remap_svm_temp](double val){return (val - b_remap_svm_temp) * w_remap_svm_temp;}); + std::transform(_test_value.begin(), _test_value.end(), _test_value_svm.begin(), [w_remap_svm_temp, b_remap_svm_temp](double val){return (val - b_remap_svm_temp) * w_remap_svm_temp;}); +} ModelNode::~ModelNode() {} diff --git a/src/feature_creation/node/ModelNode.hpp b/src/feature_creation/node/ModelNode.hpp index 044b04b7374673009bed121958a605a872096e5a..0640d11d104b0a1c052baba3adaf8489769f35a9 100644 --- a/src/feature_creation/node/ModelNode.hpp +++ b/src/feature_creation/node/ModelNode.hpp @@ -100,9 +100,13 @@ public: inline double* svm_value_ptr(){return _value_svm.data();} - inline double remap_b_svm(double b){return b + _b_remap_svm;} + inline std::vector<double> svm_test_value(){return _test_value_svm;} - inline double remap_w_svm(double w){return w / _w_remap_svm;} + inline double* svm_test_value_ptr(){return _test_value_svm.data();} + + inline double remap_b_svm(double w){return w * _b_remap_svm;} + + inline double remap_w_svm(double w){return w * _w_remap_svm;} // DocString: model_node_set_value /** diff --git a/src/main.cpp b/src/main.cpp index 9a59967bbf915279877bd047e97be0c99b25070a..b6b12267505488459d1af859b5b6c9cb4cbfa1ab 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -64,7 +64,6 @@ int main(int argc, char const *argv[]) { for(int ii = 0; ii < sisso.models().size(); ++ii) { - std::cout << sisso.models()[ii][0].n_convex_overlap_train() << '\t' << sisso.models()[ii][0].n_svm_misclassified_train() << '\t' << sisso.models()[ii][0].n_samp_train() << '\t' << static_cast<double>(sisso.models()[ii][0].n_convex_overlap_train()) / static_cast<double>(sisso.models()[ii][0].n_samp_train()) << std::endl; std::cout << "Train Error: " << sisso.models()[ii][0].percent_train_error(); if(IP._prop_test.size() > 0) std::cout << "; Test Error: " << sisso.models()[ii][0].percent_test_error() << std::endl; diff --git a/src/python/descriptor_identifier/SISSOClassifier.cpp b/src/python/descriptor_identifier/SISSOClassifier.cpp index 40634c1e559f22cae047835d6102755a7ab05ab3..84fb5868be36822df333bc305074229e96889f2e 100644 --- a/src/python/descriptor_identifier/SISSOClassifier.cpp +++ b/src/python/descriptor_identifier/SISSOClassifier.cpp @@ -14,6 +14,14 @@ SISSOClassifier::SISSOClassifier( bool fix_intercept ) : SISSO_DI(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept), + _lp_elements((n_dim + 1) * _n_samp, 0.0), + _lp_row_lower((n_dim + 1) * _n_samp, 1.0), + _lp_row_upper((n_dim + 1) * _n_samp, 1.0), + _lp_objective(_n_samp, 0.0), + _lp_column_lower(_n_samp, 0.0), + _lp_column_upper(_n_samp, COIN_DBL_MAX), + _lp_column_start(_n_samp, 0), + _lp_row((n_dim + 1) * _n_samp, 0), _training(_task_sizes_train.size()), _int_prop(_prop.size()), _int_prop_test(_prop_test.size()), @@ -24,7 +32,31 @@ SISSOClassifier::SISSOClassifier( std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop); std::vector<double> prop_test_vec = python_conv_utils::from_ndarray<double>(prop_test); - _trainer.stoppingCondition().maxSeconds = 0.01; + for(auto& pt : prop_test_vec) + if(std::none_of(prop_vec.begin(), prop_vec.end(), [&pt](double pp){return std::abs(pp - pt) < 1e-10;})) + throw std::logic_error("A class in the property vector (test set) is not in the training set."); + + _lp_simplex.setLogLevel(0); + _lp_simplex.setMoreSpecialOptions(8192 + 64 + 2); + _trainer.stoppingCondition().maxSeconds = 0.001; + + std::vector<int> inds = util_funcs::argsort(prop_vec); + std::map<double, double> class_map; + double class_num = 0; + prop_vec[inds[0]] = 0; + class_map[inds[0]] = 0; + for(int ii = 1; ii < inds.size(); ++ii) + { + if(_prop[inds[ii]] != _prop[inds[ii-1]]) + { + ++class_num; + ++_n_class; + class_map[_prop[inds[ii]]] = class_num; + } + prop_vec[inds[ii]] = class_num; + } + if(_n_class > 2) + _trainer.setMcSvmType(shark::McSvm::WW); double min_prop = *std::min_element(prop_vec.begin(), prop_vec.end()); if(min_prop < 0.0) @@ -57,29 +89,35 @@ SISSOClassifier::SISSOClassifier( std::vector<int> n_samp_per_class; int task_start; + for(int tt = 0; tt < _n_task; ++tt) { - std::vector<int> inds(_task_sizes_train[tt]); + inds = std::vector<int>(_task_sizes_train[tt]); std::iota(inds.begin(), inds.end(), task_start); for(int cc = 0; cc < _n_class; ++cc) - _inds_check.push_back(inds); + { + _class_task_check.push_back(std::vector<int>(_n_class)); + std::iota(_class_task_check.back().begin(), _class_task_check.back().end(), _n_class * tt); + } util_funcs::argsort(inds.data(), inds.data() + inds.size(), &_prop[task_start]); - _sample_inds_to_sorted_dmat_inds[0] = task_start; + _sample_inds_to_sorted_dmat_inds[inds[0]] = task_start; int cls_start = 0; - int cc= 0; + int cc = 0; for(int ii = 1; ii < inds.size(); ++ii) { _sample_inds_to_sorted_dmat_inds[inds[ii]] = ii + task_start; if(_prop[inds[ii]] != _prop[inds[ii - 1]]) { - _inds_check[tt * _n_class + cc].erase(_inds_check[tt * _n_class + cc].begin() + cls_start, _inds_check[tt * _n_class + cc].begin() + ii); + _class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc); n_samp_per_class.push_back(ii - cls_start); cls_start = ii; ++cc; } } + _class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc); + n_samp_per_class.push_back(inds.size() - cls_start); if(n_samp_per_class.size() != (tt + 1) * _n_class) throw std::logic_error("A class is not represented in task " + std::to_string(tt) + "."); @@ -103,6 +141,14 @@ SISSOClassifier::SISSOClassifier( bool fix_intercept ) : SISSO_DI(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept), + _lp_elements((n_dim + 1) * _n_samp, 0.0), + _lp_row_lower((n_dim + 1) * _n_samp, 1.0), + _lp_row_upper((n_dim + 1) * _n_samp, 1.0), + _lp_objective(_n_samp, 0.0), + _lp_column_lower(_n_samp, 0.0), + _lp_column_upper(_n_samp, COIN_DBL_MAX), + _lp_column_start(_n_samp, 0), + _lp_row((n_dim + 1) * _n_samp, 0), _training(_task_sizes_train.size()), _int_prop(_prop.size()), _int_prop_test(_prop_test.size()), @@ -113,7 +159,31 @@ SISSOClassifier::SISSOClassifier( std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop); std::vector<double> prop_test_vec = python_conv_utils::from_list<double>(prop_test); - _trainer.stoppingCondition().maxSeconds = 0.01; + for(auto& pt : prop_test_vec) + if(std::none_of(prop_vec.begin(), prop_vec.end(), [&pt](double pp){return std::abs(pp - pt) < 1e-10;})) + throw std::logic_error("A class in the property vector (test set) is not in the training set."); + + _lp_simplex.setLogLevel(0); + _lp_simplex.setMoreSpecialOptions(8192 + 64 + 2); + _trainer.stoppingCondition().maxSeconds = 0.001; + + std::vector<int> inds = util_funcs::argsort(prop_vec); + std::map<double, double> class_map; + double class_num = 0; + prop_vec[inds[0]] = 0; + class_map[inds[0]] = 0; + for(int ii = 1; ii < inds.size(); ++ii) + { + if(_prop[inds[ii]] != _prop[inds[ii-1]]) + { + ++class_num; + ++_n_class; + class_map[_prop[inds[ii]]] = class_num; + } + prop_vec[inds[ii]] = class_num; + } + if(_n_class > 2) + _trainer.setMcSvmType(shark::McSvm::WW); double min_prop = *std::min_element(prop_vec.begin(), prop_vec.end()); if(min_prop < 0.0) @@ -143,29 +213,35 @@ SISSOClassifier::SISSOClassifier( std::vector<int> n_samp_per_class; int task_start; + for(int tt = 0; tt < _n_task; ++tt) { - std::vector<int> inds(_task_sizes_train[tt]); + inds = std::vector<int>(_task_sizes_train[tt]); std::iota(inds.begin(), inds.end(), task_start); for(int cc = 0; cc < _n_class; ++cc) - _inds_check.push_back(inds); + { + _class_task_check.push_back(std::vector<int>(_n_class)); + std::iota(_class_task_check.back().begin(), _class_task_check.back().end(), _n_class * tt); + } util_funcs::argsort(inds.data(), inds.data() + inds.size(), &_prop[task_start]); - _sample_inds_to_sorted_dmat_inds[0] = task_start; + _sample_inds_to_sorted_dmat_inds[inds[0]] = task_start; int cls_start = 0; - int cc= 0; + int cc = 0; for(int ii = 1; ii < inds.size(); ++ii) { _sample_inds_to_sorted_dmat_inds[inds[ii]] = ii + task_start; if(_prop[inds[ii]] != _prop[inds[ii - 1]]) { - _inds_check[tt * _n_class + cc].erase(_inds_check[tt * _n_class + cc].begin() + cls_start, _inds_check[tt * _n_class + cc].begin() + ii); + _class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc); n_samp_per_class.push_back(ii - cls_start); cls_start = ii; ++cc; } } + _class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc); + n_samp_per_class.push_back(inds.size() - cls_start); if(n_samp_per_class.size() != (tt + 1) * _n_class) throw std::logic_error("A class is not represented in task " + std::to_string(tt) + ".");