Skip to content
Snippets Groups Projects
Commit 9d933d79 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Classification runs but running with different numbers of MPI ranks leads to different results

Debug tomorrow
parent c348ec24
No related branches found
No related tags found
No related merge requests found
......@@ -91,4 +91,17 @@ double convex_hull::overlap_1d(double* value, double width)
}
}
return util_funcs::mean(TASK_SCORES);
}
\ No newline at end of file
}
// int lp_utils::pt_in_convex_hull(ClpSimplex& lp_simplex, std::vector<int>& inds, double* row_lower, double* row_upper, int samp_ind)
// {
// std::transform(inds.begin(), inds.end(), row_lower, [&samp_ind](int ind){return prop_sorted_d_mat::get_sorted_d_matrix_el(ind, samp_ind);});
// std::transform(inds.begin(), inds.end(), row_upper, [&samp_ind](int ind){return prop_sorted_d_mat::get_sorted_d_matrix_el(ind, samp_ind);});
// lp_simplex.chgRowLower(row_lower);
// lp_simplex.chgRowUpper(row_upper);
// lp_simplex.primal();
// return lp_simplex.primalFeasible();
// }
......@@ -10,6 +10,11 @@
#include <utils/math_funcs.hpp>
#include <iomanip>
#include <coin/ClpSimplex.hpp>
#include <classification/prop_sorted_d_mat.hpp>
namespace convex_hull
{
extern std::vector<double> SORTED_VALUE; //!< The value sorted based on the property
......@@ -39,4 +44,5 @@ namespace convex_hull
double overlap_1d(double* value, double width = 1e-10);
}
#endif
\ No newline at end of file
......@@ -16,6 +16,7 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train,
int n_row = _n_dim + 1;
ClpSimplex lp_simplex; //!< Model used to find if a point is inside the convex hull
lp_simplex.setLogLevel(0);
lp_simplex.setMoreSpecialOptions(2);
std::vector<std::vector<int>> inds_check;
std::vector<double> lp_column_lower(_n_samp_train + 1, 0.0);
......@@ -76,13 +77,8 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train,
int n_col = inds_class_train[c1].size();
for(int dd = 0; dd < _n_dim; ++dd)
{
for(int ii = 0; ii < n_col; ++ii)
{
lp_elements[ii * n_row + dd] = _feats[dd]->value_ptr()[inds_class_train[c1][ii]];
// std::cout << inds_class_train[c1][ii] << ',' << lp_elements[ii * n_row + dd] << ',' << lp_elements[ii * n_row + dd + 1] << std::endl;
}
}
for(int c2 = 0; c2 < inds_class_train.size(); ++c2)
{
......@@ -134,7 +130,6 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train,
}
// Setup ML Shark data structures for SVM training
std::cout << "setup ml" << std::endl;
std::vector<shark::LabeledData<shark::RealVector, unsigned int>> training(_task_sizes_train.size());
std::vector<shark::LabeledData<shark::RealVector, unsigned int>> test(_task_sizes_test.size());
std::vector<unsigned int> prop_to_copy(_prop_train.size());
......@@ -189,6 +184,15 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train,
n_els += shark::batchSize(batch);
n_copied += shark::batchSize(batch);
}
n_copied = 0;
for(auto& batch : data.batches())
{
for(int ff = 0; ff < _n_dim; ++ff)
dcopy_(shark::batchSize(batch), _feats[ff]->svm_test_value_ptr() + n_copied, 1, &batch(0, 0) + ff, _n_dim);
n_copied += shark::batchSize(batch);
}
test[tt] = shark::LabeledData<shark::RealVector, unsigned int>(data, labels);
}
......@@ -205,11 +209,10 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train,
int ii_test = 0;
for(int tt = 0; tt < _task_sizes_train.size(); ++tt)
{
std::cout << "perform svm" << std::endl;
trainer.stoppingCondition().maxSeconds = 0.001;
trainer.stoppingCondition().maxIterations = 1000;
trainer.stoppingCondition().minAccuracy = 0.01;
trainer.train(model, training[tt]);
std::cout << "process svm" << std::endl;
if(_fix_intercept)
{
_coefs.push_back(std::vector<double>(_n_dim, 0.0));
......@@ -222,34 +225,16 @@ ModelClassifier::ModelClassifier(Unit prop_unit, std::vector<double> prop_train,
_coefs.push_back(std::vector<double>(_n_dim + 1, 0.0));
std::copy_n(model.parameterVector().begin(), _n_dim + 1, _coefs.back().begin());
for(int cc = 0; cc < _n_dim; ++cc)
{
_coefs.back()[cc] = _feats[cc]->remap_w_svm(_coefs.back()[cc]);
_coefs.back()[_n_dim] = _feats.back()->remap_b_svm(_coefs.back()[_n_dim]);
}
_coefs.back()[_n_dim] -= _feats[cc]->remap_b_svm(_coefs.back()[cc]);
// output = model(training[tt].inputs());
// for(auto element: output.elements())
// {
// _prop_train_est[ii_train] = static_cast<double>(element);
// ++ii_train;
// }
// if(_task_sizes_test[tt] == 0)
// continue;
// output = model(test[tt].inputs());
// for(auto element: output.elements())
// {
// _prop_test_est[ii_test] = static_cast<double>(element);
// ++ii_test;
// }
}
}
}
std::cout << "set error" << std::endl;
std::transform(_prop_train_est.begin(), _prop_train_est.end(), _prop_train.begin(), _train_error.begin(), [](double est, double real){return (std::abs(est - real) < 1e-10) ? -1 : real;});
std::transform(_prop_test_est.begin(), _prop_test_est.end(), _prop_test.begin(), _test_error.begin(), [](double est, double real){return (std::abs(est - real) < 1e-10) ? -1 : real;});
std::cout << *std::min_element(_train_error.begin(), _train_error.end()) << std::endl;
std::cout << *std::max_element(_train_error.begin(), _train_error.end()) << std::endl;
std::cout << std::count_if(_train_error.begin(), _train_error.end(), [](double te){return te >= 0;}) << std::endl;
std::cout << "out" << std::endl;
}
ModelClassifier::ModelClassifier(std::string train_file)
......
......@@ -18,9 +18,11 @@
#include<fstream>
#include<iostream>
#include <shark/Algorithms/Trainers/CSvmTrainer.h>
#include <shark/Algorithms/Trainers/NormalizeComponentsUnitVariance.h>
#include <shark/Data/Dataset.h>
#include <shark/Models/Normalizer.h>
#include <shark/ObjectiveFunctions/Loss/ZeroOneLoss.h>
#include <shark/Algorithms/Trainers/CSvmTrainer.h>
#include <coin/ClpSimplex.hpp>
#include <descriptor_identifier/Model/Model.hpp>
......
......@@ -15,8 +15,8 @@ SISSOClassifier::SISSOClassifier(
):
SISSO_DI(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept),
_lp_elements((n_dim + 1) * _n_samp, 0.0),
_lp_row_lower(_n_dim + 1, 1.0),
_lp_row_upper(_n_dim + 1, 1.0),
_lp_row_lower((n_dim + 1) * _n_samp, 1.0),
_lp_row_upper((n_dim + 1) * _n_samp, 1.0),
_lp_objective(_n_samp, 0.0),
_lp_column_lower(_n_samp, 0.0),
_lp_column_upper(_n_samp, COIN_DBL_MAX),
......@@ -37,10 +37,8 @@ SISSOClassifier::SISSOClassifier(
if(std::none_of(prop.begin(), prop.end(), [&pt](double pp){return std::abs(pp - pt) < 1e-10;}))
throw std::logic_error("A class in the property vector (test set) is not in the training set.");
_lp_row_lower[_n_dim] -= _width;
_lp_row_upper[_n_dim] += _width;
_lp_simplex.setLogLevel(0);
_lp_simplex.setMoreSpecialOptions(8192 + 64 + 2);
_trainer.stoppingCondition().maxSeconds = 0.001;
std::vector<int> inds = util_funcs::argsort(prop);
......@@ -87,7 +85,10 @@ SISSOClassifier::SISSOClassifier(
std::iota(inds.begin(), inds.end(), task_start);
for(int cc = 0; cc < _n_class; ++cc)
_inds_check.push_back(inds);
{
_class_task_check.push_back(std::vector<int>(_n_class));
std::iota(_class_task_check.back().begin(), _class_task_check.back().end(), _n_class * tt);
}
util_funcs::argsort(inds.data(), inds.data() + inds.size(), &_prop[task_start]);
_sample_inds_to_sorted_dmat_inds[inds[0]] = task_start;
......@@ -98,13 +99,13 @@ SISSOClassifier::SISSOClassifier(
_sample_inds_to_sorted_dmat_inds[inds[ii]] = ii + task_start;
if(_prop[inds[ii]] != _prop[inds[ii - 1]])
{
_inds_check[tt * _n_class + cc].erase(_inds_check[tt * _n_class + cc].begin() + cls_start, _inds_check[tt * _n_class + cc].begin() + ii);
_class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc);
n_samp_per_class.push_back(ii - cls_start);
cls_start = ii;
++cc;
}
}
_inds_check[tt * _n_class + cc].erase(_inds_check[tt * _n_class + cc].begin() + cls_start, _inds_check[tt * _n_class + cc].end());
_class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc);
n_samp_per_class.push_back(inds.size() - cls_start);
if(n_samp_per_class.size() != (tt + 1) * _n_class)
......@@ -158,10 +159,11 @@ void SISSOClassifier::set_data(std::vector<int>& inds, int start, int task)
void SISSOClassifier::svm(std::vector<int>& inds, double* coefs, int start, int task)
{
_trainer.stoppingCondition().maxSeconds = 0.001;
_trainer.stoppingCondition().maxIterations = 1000;
_trainer.stoppingCondition().minAccuracy = 0.01;
// _trainer.stoppingCondition().maxIterations = 100;
set_data(inds, start, task);
std::cout << _training[0].inputs().elements()[0] << std::endl;
_trainer.train(_model, _training[task]);
std::copy_n(_model.parameterVector().begin(), inds.size() + (!_fix_intercept), coefs);
if(!_fix_intercept)
......@@ -243,6 +245,17 @@ void SISSOClassifier::set_lp_data(int task, int cls)
int n_dim = _cur_inds.size();
for(int ii = 0; ii < n_dim; ++ii)
dcopy_(_lp_n_col, prop_sorted_d_mat::access_sorted_d_matrix(_cur_inds[ii], task, cls), 1, &_lp_elements[ii], _lp_n_row);
int n_copied = 0;
for(auto& cls_task_ind : _class_task_check[task * _n_class + cls])
{
for(int ii = 0; ii < _cur_inds.size(); ++ii)
{
dcopy_(prop_sorted_d_mat::N_SAMPLES_PER_CLASS[cls_task_ind], prop_sorted_d_mat::access_sorted_d_matrix(_cur_inds[ii], task, cls_task_ind % _n_class), 1, &_lp_row_lower[n_copied + ii], _lp_n_row);
dcopy_(prop_sorted_d_mat::N_SAMPLES_PER_CLASS[cls_task_ind], prop_sorted_d_mat::access_sorted_d_matrix(_cur_inds[ii], task, cls_task_ind % _n_class), 1, &_lp_row_upper[n_copied + ii], _lp_n_row);
}
n_copied += prop_sorted_d_mat::N_SAMPLES_PER_CLASS[cls_task_ind] * _cur_inds.size();
}
_sz_cls_task_check = n_copied / _cur_inds.size();
_lp_simplex.loadProblem(
_lp_n_col,
......@@ -258,17 +271,19 @@ void SISSOClassifier::set_lp_data(int task, int cls)
);
}
int SISSOClassifier::pt_in_convex_hull_lp(int samp_ind)
int SISSOClassifier::pt_in_convex_hull_lp(int cls_task_ind)
{
std::transform(_cur_inds.begin(), _cur_inds.end(), _lp_row_upper.begin(), [&samp_ind](int ind){return prop_sorted_d_mat::get_sorted_d_matrix_el(ind, samp_ind);});
std::transform(_cur_inds.begin(), _cur_inds.end(), _lp_row_lower.begin(), [&samp_ind](int ind){return prop_sorted_d_mat::get_sorted_d_matrix_el(ind, samp_ind);});
_lp_simplex.chgRowLower(_lp_row_lower.data());
_lp_simplex.chgRowUpper(_lp_row_upper.data());
int n_overlap = 0;
for(int ii = 0; ii < _sz_cls_task_check; ++ii)
{
_lp_simplex.chgRowLower(_lp_row_lower.data() + ii * _lp_n_row);
_lp_simplex.chgRowUpper(_lp_row_upper.data() + ii * _lp_n_row);
_lp_simplex.primal();
_lp_simplex.dual();
n_overlap += _lp_simplex.primalFeasible();
}
return _lp_simplex.primalFeasible();
return n_overlap;
}
int SISSOClassifier::get_n_overlap_lp()
......@@ -280,8 +295,9 @@ int SISSOClassifier::get_n_overlap_lp()
{
_lp_n_col = prop_sorted_d_mat::get_class_size(tt, cc);
set_lp_data(tt, cc);
for(auto& ind : _inds_check[tt * _n_class + cc])
n_overlap += pt_in_convex_hull_lp(ind);
// n_overlap += std::accumulate(_inds_check[tt * _n_class + cc].begin(), _inds_check[tt * _n_class + cc].end(), 0, [this](int tot, int samp_ind){return tot + pt_in_convex_hull_lp(samp_ind);});
for(auto& cls_task_ind : _class_task_check[tt * _n_class + cc])
n_overlap += pt_in_convex_hull_lp(cls_task_ind);
}
}
return n_overlap;
......@@ -294,6 +310,8 @@ void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim)
_lp_n_row = n_dim + 1;
std::fill_n(_lp_elements.begin(), _lp_elements.size(), 1.0);
std::fill_n(_lp_row_lower.begin(), _lp_row_lower.size(), 1.0 - _width);
std::fill_n(_lp_row_upper.begin(), _lp_row_upper.size(), 1.0 + _width);
std::iota(_lp_column_start.begin(), _lp_column_start.end(), 0);
std::transform(_lp_column_start.begin(), _lp_column_start.end(), _lp_column_start.begin(), [&n_dim](int ii){return (n_dim + 1) * ii;});
......@@ -332,10 +350,6 @@ void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim)
}
} while(util_funcs::iterate(_cur_inds, _cur_inds.size(), _mpi_comm->size()));
for(int nn = 0; nn < min_n_convex_overlap.size(); ++nn)
std::cout << nn << '\t' << min_n_convex_overlap[nn] << '\t' << min_inds[nn].size() << std::endl;
std::vector<int> inds_min(n_get_models * n_dim, -1);
if(min_inds[max_error_ind].size() > 1)
{
......@@ -353,7 +367,7 @@ void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim)
}
test_inds = find_min_svm(test_inds, n_keep);
for(int nn = 0; nn < n_keep; ++nn)
min_inds[inds_change[nn]] = {test_inds[inds_change[nn]]};
min_inds[inds_change[nn]] = {test_inds[nn]};
}
for(int ii = 0; ii < n_get_models; ++ii)
std::copy_n(min_inds[ii][0].begin(), min_inds[ii][0].size(), &inds_min[n_dim * ii]);
......@@ -396,11 +410,9 @@ void SISSOClassifier::l0_norm(std::vector<double>& prop, int n_dim)
min_nodes[ii] = std::make_shared<ModelNode>(_feat_space->phi_selected()[index]->arr_ind(), _feat_space->phi_selected()[index]->rung(), _feat_space->phi_selected()[index]->expr(), _feat_space->phi_selected()[index]->postfix_expr(), _feat_space->phi_selected()[index]->value(), _feat_space->phi_selected()[index]->test_value(), _feat_space->phi_selected()[index]->unit());
}
models.push_back(ModelClassifier(_prop_unit, _prop, _prop_test, min_nodes, _task_sizes_train, _task_sizes_test, _fix_intercept));
std::cout << models.back().n_svm_misclassified_train() << std::endl;
}
_models.push_back(models);
std::cout << _models.back().back().n_svm_misclassified_train() << std::endl;
}
void SISSOClassifier::fit()
......@@ -462,19 +474,15 @@ void SISSOClassifier::fit()
if(_mpi_comm->rank() == 0)
{
std::cout << "Time for l0-norm: " << duration << std::endl;
std::cout << _models.back().back().n_svm_misclassified_train() << std::endl;
for(int rr = 0; rr < _n_models_store; ++rr)
{
_models.back()[rr].to_file("models/train_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat");
if(_leave_out_inds.size() > 0)
_models.back()[rr].to_file("models/test_dim_" + std::to_string(dd) + "_model_" + std::to_string(rr) + ".dat", false, _leave_out_inds);
}
std::cout << _models.back().back().n_svm_misclassified_train() << std::endl;
}
for(int rr = 0; rr < _n_residual; ++rr)
_models.back()[rr].copy_error(&residual[rr * _n_samp]);
std::cout << _models.back().back().n_svm_misclassified_train() << std::endl;
}
}
......@@ -37,7 +37,7 @@ protected:
ClpSimplex _lp_simplex; //!< Model used to find if a point is inside the convex hull
std::vector<std::vector<ModelClassifier>> _models; //!< List of models
std::vector<std::vector<int>> _inds_check;
std::vector<std::vector<int>> _class_task_check;
std::vector<double> _lp_elements;
std::vector<double> _lp_row_lower;
......@@ -66,6 +66,7 @@ protected:
double _c;
double _width;
int _sz_cls_task_check;
int _n_class;
int _lp_n_col;
int _lp_n_row;
......
......@@ -11,16 +11,16 @@ ModelNode::ModelNode(int feat_ind, int rung, std::string expr, std::string post_
_b_remap_svm(0.0),
_w_remap_svm(1.0),
_rung(rung)
{
double w_remap_svm_temp = 2.0 / (*std::max_element(_value.begin(), _value.end()) - *std::min_element(_value.begin(), _value.end()));
double b_remap_svm_temp = w_remap_svm_temp * (*std::max_element(_value.begin(), _value.end()) + *std::min_element(_value.begin(), _value.end())) / 2.0;
{
double w_remap_svm_temp = 1.0 / (*std::max_element(_value.begin(), _value.end()) - *std::min_element(_value.begin(), _value.end()));
double b_remap_svm_temp = *std::min_element(_value.begin(), _value.end());
_w_remap_svm = w_remap_svm_temp;
_b_remap_svm = b_remap_svm_temp;
std::transform(_value.begin(), _value.end(), _value_svm.begin(), [w_remap_svm_temp, b_remap_svm_temp](double val){return val * w_remap_svm_temp - b_remap_svm_temp;});
std::transform(_test_value.begin(), _test_value.end(), _test_value_svm.begin(), [w_remap_svm_temp, b_remap_svm_temp](double val){return val * w_remap_svm_temp - b_remap_svm_temp;});
}
std::transform(_value.begin(), _value.end(), _value_svm.begin(), [w_remap_svm_temp, b_remap_svm_temp](double val){return (val - b_remap_svm_temp) * w_remap_svm_temp;});
std::transform(_test_value.begin(), _test_value.end(), _test_value_svm.begin(), [w_remap_svm_temp, b_remap_svm_temp](double val){return (val - b_remap_svm_temp) * w_remap_svm_temp;});
}
ModelNode::~ModelNode()
{}
......
......@@ -100,9 +100,13 @@ public:
inline double* svm_value_ptr(){return _value_svm.data();}
inline double remap_b_svm(double b){return b + _b_remap_svm;}
inline std::vector<double> svm_test_value(){return _test_value_svm;}
inline double remap_w_svm(double w){return w / _w_remap_svm;}
inline double* svm_test_value_ptr(){return _test_value_svm.data();}
inline double remap_b_svm(double w){return w * _b_remap_svm;}
inline double remap_w_svm(double w){return w * _w_remap_svm;}
// DocString: model_node_set_value
/**
......
......@@ -64,7 +64,6 @@ int main(int argc, char const *argv[])
{
for(int ii = 0; ii < sisso.models().size(); ++ii)
{
std::cout << sisso.models()[ii][0].n_convex_overlap_train() << '\t' << sisso.models()[ii][0].n_svm_misclassified_train() << '\t' << sisso.models()[ii][0].n_samp_train() << '\t' << static_cast<double>(sisso.models()[ii][0].n_convex_overlap_train()) / static_cast<double>(sisso.models()[ii][0].n_samp_train()) << std::endl;
std::cout << "Train Error: " << sisso.models()[ii][0].percent_train_error();
if(IP._prop_test.size() > 0)
std::cout << "; Test Error: " << sisso.models()[ii][0].percent_test_error() << std::endl;
......
......@@ -14,6 +14,14 @@ SISSOClassifier::SISSOClassifier(
bool fix_intercept
) :
SISSO_DI(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept),
_lp_elements((n_dim + 1) * _n_samp, 0.0),
_lp_row_lower((n_dim + 1) * _n_samp, 1.0),
_lp_row_upper((n_dim + 1) * _n_samp, 1.0),
_lp_objective(_n_samp, 0.0),
_lp_column_lower(_n_samp, 0.0),
_lp_column_upper(_n_samp, COIN_DBL_MAX),
_lp_column_start(_n_samp, 0),
_lp_row((n_dim + 1) * _n_samp, 0),
_training(_task_sizes_train.size()),
_int_prop(_prop.size()),
_int_prop_test(_prop_test.size()),
......@@ -24,7 +32,31 @@ SISSOClassifier::SISSOClassifier(
std::vector<double> prop_vec = python_conv_utils::from_ndarray<double>(prop);
std::vector<double> prop_test_vec = python_conv_utils::from_ndarray<double>(prop_test);
_trainer.stoppingCondition().maxSeconds = 0.01;
for(auto& pt : prop_test_vec)
if(std::none_of(prop_vec.begin(), prop_vec.end(), [&pt](double pp){return std::abs(pp - pt) < 1e-10;}))
throw std::logic_error("A class in the property vector (test set) is not in the training set.");
_lp_simplex.setLogLevel(0);
_lp_simplex.setMoreSpecialOptions(8192 + 64 + 2);
_trainer.stoppingCondition().maxSeconds = 0.001;
std::vector<int> inds = util_funcs::argsort(prop_vec);
std::map<double, double> class_map;
double class_num = 0;
prop_vec[inds[0]] = 0;
class_map[inds[0]] = 0;
for(int ii = 1; ii < inds.size(); ++ii)
{
if(_prop[inds[ii]] != _prop[inds[ii-1]])
{
++class_num;
++_n_class;
class_map[_prop[inds[ii]]] = class_num;
}
prop_vec[inds[ii]] = class_num;
}
if(_n_class > 2)
_trainer.setMcSvmType(shark::McSvm::WW);
double min_prop = *std::min_element(prop_vec.begin(), prop_vec.end());
if(min_prop < 0.0)
......@@ -57,29 +89,35 @@ SISSOClassifier::SISSOClassifier(
std::vector<int> n_samp_per_class;
int task_start;
for(int tt = 0; tt < _n_task; ++tt)
{
std::vector<int> inds(_task_sizes_train[tt]);
inds = std::vector<int>(_task_sizes_train[tt]);
std::iota(inds.begin(), inds.end(), task_start);
for(int cc = 0; cc < _n_class; ++cc)
_inds_check.push_back(inds);
{
_class_task_check.push_back(std::vector<int>(_n_class));
std::iota(_class_task_check.back().begin(), _class_task_check.back().end(), _n_class * tt);
}
util_funcs::argsort(inds.data(), inds.data() + inds.size(), &_prop[task_start]);
_sample_inds_to_sorted_dmat_inds[0] = task_start;
_sample_inds_to_sorted_dmat_inds[inds[0]] = task_start;
int cls_start = 0;
int cc= 0;
int cc = 0;
for(int ii = 1; ii < inds.size(); ++ii)
{
_sample_inds_to_sorted_dmat_inds[inds[ii]] = ii + task_start;
if(_prop[inds[ii]] != _prop[inds[ii - 1]])
{
_inds_check[tt * _n_class + cc].erase(_inds_check[tt * _n_class + cc].begin() + cls_start, _inds_check[tt * _n_class + cc].begin() + ii);
_class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc);
n_samp_per_class.push_back(ii - cls_start);
cls_start = ii;
++cc;
}
}
_class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc);
n_samp_per_class.push_back(inds.size() - cls_start);
if(n_samp_per_class.size() != (tt + 1) * _n_class)
throw std::logic_error("A class is not represented in task " + std::to_string(tt) + ".");
......@@ -103,6 +141,14 @@ SISSOClassifier::SISSOClassifier(
bool fix_intercept
) :
SISSO_DI(feat_space, prop_unit, prop, prop_test, task_sizes_train, task_sizes_test, leave_out_inds, n_dim, n_residual, n_models_store, fix_intercept),
_lp_elements((n_dim + 1) * _n_samp, 0.0),
_lp_row_lower((n_dim + 1) * _n_samp, 1.0),
_lp_row_upper((n_dim + 1) * _n_samp, 1.0),
_lp_objective(_n_samp, 0.0),
_lp_column_lower(_n_samp, 0.0),
_lp_column_upper(_n_samp, COIN_DBL_MAX),
_lp_column_start(_n_samp, 0),
_lp_row((n_dim + 1) * _n_samp, 0),
_training(_task_sizes_train.size()),
_int_prop(_prop.size()),
_int_prop_test(_prop_test.size()),
......@@ -113,7 +159,31 @@ SISSOClassifier::SISSOClassifier(
std::vector<double> prop_vec = python_conv_utils::from_list<double>(prop);
std::vector<double> prop_test_vec = python_conv_utils::from_list<double>(prop_test);
_trainer.stoppingCondition().maxSeconds = 0.01;
for(auto& pt : prop_test_vec)
if(std::none_of(prop_vec.begin(), prop_vec.end(), [&pt](double pp){return std::abs(pp - pt) < 1e-10;}))
throw std::logic_error("A class in the property vector (test set) is not in the training set.");
_lp_simplex.setLogLevel(0);
_lp_simplex.setMoreSpecialOptions(8192 + 64 + 2);
_trainer.stoppingCondition().maxSeconds = 0.001;
std::vector<int> inds = util_funcs::argsort(prop_vec);
std::map<double, double> class_map;
double class_num = 0;
prop_vec[inds[0]] = 0;
class_map[inds[0]] = 0;
for(int ii = 1; ii < inds.size(); ++ii)
{
if(_prop[inds[ii]] != _prop[inds[ii-1]])
{
++class_num;
++_n_class;
class_map[_prop[inds[ii]]] = class_num;
}
prop_vec[inds[ii]] = class_num;
}
if(_n_class > 2)
_trainer.setMcSvmType(shark::McSvm::WW);
double min_prop = *std::min_element(prop_vec.begin(), prop_vec.end());
if(min_prop < 0.0)
......@@ -143,29 +213,35 @@ SISSOClassifier::SISSOClassifier(
std::vector<int> n_samp_per_class;
int task_start;
for(int tt = 0; tt < _n_task; ++tt)
{
std::vector<int> inds(_task_sizes_train[tt]);
inds = std::vector<int>(_task_sizes_train[tt]);
std::iota(inds.begin(), inds.end(), task_start);
for(int cc = 0; cc < _n_class; ++cc)
_inds_check.push_back(inds);
{
_class_task_check.push_back(std::vector<int>(_n_class));
std::iota(_class_task_check.back().begin(), _class_task_check.back().end(), _n_class * tt);
}
util_funcs::argsort(inds.data(), inds.data() + inds.size(), &_prop[task_start]);
_sample_inds_to_sorted_dmat_inds[0] = task_start;
_sample_inds_to_sorted_dmat_inds[inds[0]] = task_start;
int cls_start = 0;
int cc= 0;
int cc = 0;
for(int ii = 1; ii < inds.size(); ++ii)
{
_sample_inds_to_sorted_dmat_inds[inds[ii]] = ii + task_start;
if(_prop[inds[ii]] != _prop[inds[ii - 1]])
{
_inds_check[tt * _n_class + cc].erase(_inds_check[tt * _n_class + cc].begin() + cls_start, _inds_check[tt * _n_class + cc].begin() + ii);
_class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc);
n_samp_per_class.push_back(ii - cls_start);
cls_start = ii;
++cc;
}
}
_class_task_check[tt * _n_class + cc].erase(_class_task_check[tt * _n_class + cc].begin() + cc);
n_samp_per_class.push_back(inds.size() - cls_start);
if(n_samp_per_class.size() != (tt + 1) * _n_class)
throw std::logic_error("A class is not represented in task " + std::to_string(tt) + ".");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment