Commit 867ca3d0 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Update Tests for coverage

This should fix the issues
parent a5098416
......@@ -235,7 +235,8 @@ void LossFunctionConvexHull::setup_lp(bool initialize_sorted_d_mat)
void LossFunctionConvexHull::reset_projection_prop(const std::vector<std::vector<model_node_ptr>>& models)
{
_n_project_prop = models.size();
_projection_prop.resize(_n_samp * _n_project_prop);
_projection_prop.resize(_n_samp * _n_project_prop, 0);
_projection_prop_std.resize(_n_samp * _n_project_prop, 0);
for(int mm = 0; mm < _n_project_prop; ++mm)
{
double loss = (*this)(models[mm]);
......
......@@ -389,20 +389,23 @@ bool comp_feats::valid_feature_against_selected_spearman(
);
volatile bool is_valid = true;
#pragma omp parallel for schedule(dynamic)
for(int dd = start_sel; dd < end_sel; ++dd)
#pragma omp parallel if(omp_get_num_threads() == 0)
{
if(!is_valid)
continue;
// Rank the new variable and take the Pearson correlation of the rank variables (val_ptr rank still in &RANK[(omp_get_thread_num() * 4 + 2) * n_samp])
util_funcs::rank(node_value_arrs::get_d_matrix_ptr(dd), &RANK[omp_get_thread_num() * 4 * n_samp], &INDEX[omp_get_thread_num() * 2 * n_samp], n_samp);
double comp_value = (
base_val - std::abs(util_funcs::r(&RANK[omp_get_thread_num() * 4 * n_samp], &RANK[(omp_get_thread_num() * 4 + 2) * n_samp], n_samp))
);
if(std::abs(comp_value) < 1.0 -cross_cor_max + 5.0e-9)
#pragma omp for schedule(dynamic)
for(int dd = start_sel; dd < end_sel; ++dd)
{
is_valid = false;
if(!is_valid)
continue;
// Rank the new variable and take the Pearson correlation of the rank variables (val_ptr rank still in &RANK[(omp_get_thread_num() * 4 + 2) * n_samp])
util_funcs::rank(node_value_arrs::get_d_matrix_ptr(dd), &RANK[omp_get_thread_num() * 4 * n_samp], &INDEX[omp_get_thread_num() * 2 * n_samp], n_samp);
double comp_value = (
base_val - std::abs(util_funcs::r(&RANK[omp_get_thread_num() * 4 * n_samp], &RANK[(omp_get_thread_num() * 4 + 2) * n_samp], n_samp))
);
if(std::abs(comp_value) < 1.0 -cross_cor_max + 5.0e-9)
{
is_valid = false;
}
}
}
return is_valid;
......
......@@ -5,7 +5,7 @@
"n_residual": 1,
"data_file": "data.csv",
"data_file_relatice_to_json": true,
"max_feat_cross_correlation": 0.9,
"max_feat_cross_correlation": 0.99,
"property_key": "prop",
"leave_out_frac": 0.2,
"n_models_store": 1,
......
// // Copyright 2021 Thomas A. R. Purcell
// //
// // Licensed under the Apache License, Version 2.0 (the "License");
// // you may not use this file except in compliance with the License.
// // You may obtain a copy of the License at
// //
// // http://www.apache.org/licenses/LICENSE-2.0
// //
// // Unless required by applicable law or agreed to in writing, software
// // distributed under the License is distributed on an "AS IS" BASIS,
// // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// // See the License for the specific language governing permissions and
// // limitations under the License.
// #include <descriptor_identifier/solver/SISSOClassifier.hpp>
// #include <boost/filesystem.hpp>
// #include "gtest/gtest.h"
// #include <random>
// namespace
// {
// class SISSOClassifierTests : public ::testing::Test
// {
// protected:
// void SetUp() override
// {
// allowed_op_maps::set_node_maps();
// node_value_arrs::initialize_d_matrix_arr();
// mpi_setup::init_mpi_env();
// std::vector<int> task_sizes_train = {80};
// std::vector<int> task_sizes_test = {20};
// node_value_arrs::initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2, false);
// std::vector<std::string> sample_ids_train(task_sizes_train[0]);
// for(int ii = 0; ii < task_sizes_train[0]; ++ii)
// {
// sample_ids_train[ii] = std::to_string(ii);
// }
// std::vector<std::string> sample_ids_test(task_sizes_test[0]);
// std::vector<int> leave_out_inds(task_sizes_test[0]);
// for(int ii = 0; ii < task_sizes_test[0]; ++ii)
// {
// sample_ids_test[ii] = std::to_string(ii);
// leave_out_inds[ii] = ii;
// }
// std::vector<std::string> task_names = {"all"};
// std::vector<double> value_1(task_sizes_train[0], 0.0);
// std::vector<double> value_2(task_sizes_train[0], 0.0);
// std::vector<double> value_3(task_sizes_train[0], 0.0);
// std::vector<double> test_value_1(task_sizes_test[0], 0.0);
// std::vector<double> test_value_2(task_sizes_test[0], 0.0);
// std::vector<double> test_value_3(task_sizes_test[0], 0.0);
// std::default_random_engine generator;
// std::uniform_real_distribution<double> distribution_12_pos(1.0, 2.0);
// std::uniform_real_distribution<double> distribution_12_neg(-2.0, -1.0);
// std::uniform_real_distribution<double> distribution_3(-10.0, 10.0);
// for(int ii = 0; ii < 20; ++ii)
// {
// value_1[ii] = distribution_12_neg(generator);
// value_2[ii] = distribution_12_neg(generator);
// value_3[ii] = distribution_3(generator);
// }
// for(int ii = 20; ii < 40; ++ii)
// {
// value_1[ii] = distribution_12_pos(generator);
// value_2[ii] = distribution_12_pos(generator);
// value_3[ii] = distribution_3(generator);
// }
// for(int ii = 40; ii < 60; ++ii)
// {
// value_1[ii] = distribution_12_neg(generator);
// value_2[ii] = distribution_12_pos(generator);
// value_3[ii] = distribution_3(generator);
// }
// for(int ii = 60; ii < 80; ++ii)
// {
// value_1[ii] = distribution_12_pos(generator);
// value_2[ii] = distribution_12_neg(generator);
// value_3[ii] = distribution_3(generator);
// }
// for(int ii = 0; ii < 5; ++ii)
// {
// test_value_1[ii] = distribution_12_neg(generator);
// test_value_2[ii] = distribution_12_neg(generator);
// test_value_3[ii] = distribution_3(generator);
// }
// for(int ii = 5; ii < 10; ++ii)
// {
// test_value_1[ii] = distribution_12_pos(generator);
// test_value_2[ii] = distribution_12_pos(generator);
// test_value_3[ii] = distribution_3(generator);
// }
// for(int ii = 10; ii < 15; ++ii)
// {
// test_value_1[ii] = distribution_12_neg(generator);
// test_value_2[ii] = distribution_12_pos(generator);
// test_value_3[ii] = distribution_3(generator);
// }
// for(int ii = 15; ii < 20; ++ii)
// {
// test_value_1[ii] = distribution_12_pos(generator);
// test_value_2[ii] = distribution_12_neg(generator);
// test_value_3[ii] = distribution_3(generator);
// }
// FeatureNode feat_1(0, "A", value_1, test_value_1, Unit("m"));
// FeatureNode feat_2(1, "B", value_2, test_value_2, Unit("m"));
// FeatureNode feat_3(2, "C", value_3, test_value_3, Unit());
// std::vector<double> prop = std::vector<double>(task_sizes_train[0], 0.0);
// std::vector<double> prop_test = std::vector<double>(task_sizes_test[0], 0.0);
// std::fill_n(prop.begin() + 20, 20, 1.0);
// std::fill_n(prop.begin() + 40, 20, 2.0);
// std::fill_n(prop.begin() + 60, 20, 3.0);
// std::fill_n(prop_test.begin() + 5, 5, 1.0);
// std::fill_n(prop_test.begin() + 10, 5, 2.0);
// std::fill_n(prop_test.begin() + 15, 5, 3.0);
// std::vector<FeatureNode> phi_0 ={feat_1, feat_2, feat_3};
// std::vector<std::string> allowed_ops = {"sq", "cb", "sqrt", "cbrt", "six_pow", "inv", "abs"};
// std::vector<std::string> allowed_param_ops = {};
// inputs.set_calc_type("classification");
// inputs.set_phi_0(phi_0);
// inputs.set_prop_train(prop);
// inputs.set_prop_test(prop_test);
// inputs.set_task_names(task_names);
// inputs.set_task_sizes_train(task_sizes_train);
// inputs.set_task_sizes_test(task_sizes_test);
// inputs.set_leave_out_inds(leave_out_inds);
// inputs.set_sample_ids_train(sample_ids_train);
// inputs.set_sample_ids_test(sample_ids_test);
// inputs.set_allowed_param_ops(allowed_param_ops);
// inputs.set_allowed_ops(allowed_ops);
// inputs.set_max_rung(2);
// inputs.set_n_sis_select(5);
// inputs.set_n_rung_store(1);
// inputs.set_n_rung_generate(0);
// inputs.set_prop_label("Class");
// inputs.set_prop_unit(Unit());
// inputs.set_n_dim(2);
// inputs.set_n_residual(2);
// inputs.set_n_models_store(3);
// }
// InputParser inputs;
// };
// TEST_F(SISSOClassifierTests, FixInterceptFalseTest)
// {
// std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>(inputs);
// SISSOClassifier sisso(inputs, feat_space);
// std::vector<double> prop_comp(80, 0.0);
// std::transform(inputs.prop_train().begin(), inputs.prop_train().end(), sisso.prop_train().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);});
// EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.end(), [](double p){return p > 1e-10;}));
// std::transform(inputs.prop_test().begin(), inputs.prop_test().begin() + 10, sisso.prop_test().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);});
// EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.begin() + 10, [](double p){return p > 1e-10;}));
// EXPECT_EQ(sisso.n_samp(), 80);
// EXPECT_EQ(sisso.n_dim(), 2);
// EXPECT_EQ(sisso.n_residual(), 2);
// EXPECT_EQ(sisso.n_models_store(), 3);
// sisso.fit();
// EXPECT_EQ(sisso.models().size(), 2);
// EXPECT_EQ(sisso.models()[0].size(), 3);
// EXPECT_EQ(sisso.models().back()[0].n_convex_overlap_train(), 0);
// EXPECT_EQ(sisso.models().back()[0].n_convex_overlap_test(), 0);
// EXPECT_EQ(sisso.models().back()[0].n_svm_misclassified_train(), 0);
// EXPECT_EQ(sisso.models().back()[0].n_svm_misclassified_test(), 0);
// boost::filesystem::remove_all("feature_space/");
// boost::filesystem::remove_all("models/");
// }
// }
// Copyright 2021 Thomas A. R. Purcell
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <descriptor_identifier/solver/SISSOClassifier.hpp>
#include <boost/filesystem.hpp>
#include "gtest/gtest.h"
#include <random>
namespace
{
class SISSOClassifierTests : public ::testing::Test
{
protected:
void SetUp() override
{
allowed_op_maps::set_node_maps();
node_value_arrs::initialize_d_matrix_arr();
mpi_setup::init_mpi_env();
std::vector<int> task_sizes_train = {80};
std::vector<int> task_sizes_test = {20};
node_value_arrs::initialize_values_arr(task_sizes_train, task_sizes_test, 3, 2, false);
std::vector<std::string> sample_ids_train(task_sizes_train[0]);
for(int ii = 0; ii < task_sizes_train[0]; ++ii)
{
sample_ids_train[ii] = std::to_string(ii);
}
std::vector<std::string> sample_ids_test(task_sizes_test[0]);
std::vector<int> leave_out_inds(task_sizes_test[0]);
for(int ii = 0; ii < task_sizes_test[0]; ++ii)
{
sample_ids_test[ii] = std::to_string(ii);
leave_out_inds[ii] = ii;
}
std::vector<std::string> task_names = {"all"};
std::vector<double> value_1(task_sizes_train[0], 0.0);
std::vector<double> value_2(task_sizes_train[0], 0.0);
std::vector<double> value_3(task_sizes_train[0], 0.0);
std::vector<double> test_value_1(task_sizes_test[0], 0.0);
std::vector<double> test_value_2(task_sizes_test[0], 0.0);
std::vector<double> test_value_3(task_sizes_test[0], 0.0);
std::default_random_engine generator;
std::uniform_real_distribution<double> distribution_12_pos(1.0, 2.0);
std::uniform_real_distribution<double> distribution_12_neg(-2.0, -1.0);
std::uniform_real_distribution<double> distribution_3(-10.0, 10.0);
for(int ii = 0; ii < 20; ++ii)
{
value_1[ii] = distribution_12_neg(generator);
value_2[ii] = distribution_12_neg(generator);
value_3[ii] = distribution_3(generator);
}
for(int ii = 20; ii < 40; ++ii)
{
value_1[ii] = distribution_12_pos(generator);
value_2[ii] = distribution_12_pos(generator);
value_3[ii] = distribution_3(generator);
}
for(int ii = 40; ii < 60; ++ii)
{
value_1[ii] = distribution_12_neg(generator);
value_2[ii] = distribution_12_pos(generator);
value_3[ii] = distribution_3(generator);
}
for(int ii = 60; ii < 80; ++ii)
{
value_1[ii] = distribution_12_pos(generator);
value_2[ii] = distribution_12_neg(generator);
value_3[ii] = distribution_3(generator);
}
for(int ii = 0; ii < 5; ++ii)
{
test_value_1[ii] = distribution_12_neg(generator);
test_value_2[ii] = distribution_12_neg(generator);
test_value_3[ii] = distribution_3(generator);
}
for(int ii = 5; ii < 10; ++ii)
{
test_value_1[ii] = distribution_12_pos(generator);
test_value_2[ii] = distribution_12_pos(generator);
test_value_3[ii] = distribution_3(generator);
}
for(int ii = 10; ii < 15; ++ii)
{
test_value_1[ii] = distribution_12_neg(generator);
test_value_2[ii] = distribution_12_pos(generator);
test_value_3[ii] = distribution_3(generator);
}
for(int ii = 15; ii < 20; ++ii)
{
test_value_1[ii] = distribution_12_pos(generator);
test_value_2[ii] = distribution_12_neg(generator);
test_value_3[ii] = distribution_3(generator);
}
FeatureNode feat_1(0, "A", value_1, test_value_1, Unit("m"));
FeatureNode feat_2(1, "B", value_2, test_value_2, Unit("m"));
FeatureNode feat_3(2, "C", value_3, test_value_3, Unit());
std::vector<double> prop = std::vector<double>(task_sizes_train[0], 0.0);
std::vector<double> prop_test = std::vector<double>(task_sizes_test[0], 0.0);
std::fill_n(prop.begin() + 20, 20, 1.0);
std::fill_n(prop.begin() + 40, 20, 2.0);
std::fill_n(prop.begin() + 60, 20, 3.0);
std::fill_n(prop_test.begin() + 5, 5, 1.0);
std::fill_n(prop_test.begin() + 10, 5, 2.0);
std::fill_n(prop_test.begin() + 15, 5, 3.0);
std::vector<FeatureNode> phi_0 ={feat_1, feat_2, feat_3};
std::vector<std::string> allowed_ops = {"sq", "cb", "sqrt", "cbrt", "six_pow", "inv", "abs"};
std::vector<std::string> allowed_param_ops = {};
inputs.set_calc_type("classification");
inputs.set_phi_0(phi_0);
inputs.set_prop_train(prop);
inputs.set_prop_test(prop_test);
inputs.set_task_names(task_names);
inputs.set_task_sizes_train(task_sizes_train);
inputs.set_task_sizes_test(task_sizes_test);
inputs.set_leave_out_inds(leave_out_inds);
inputs.set_sample_ids_train(sample_ids_train);
inputs.set_sample_ids_test(sample_ids_test);
inputs.set_allowed_param_ops(allowed_param_ops);
inputs.set_allowed_ops(allowed_ops);
inputs.set_max_rung(2);
inputs.set_n_sis_select(5);
inputs.set_n_rung_store(1);
inputs.set_n_rung_generate(0);
inputs.set_prop_label("Class");
inputs.set_prop_unit(Unit());
inputs.set_n_dim(2);
inputs.set_n_residual(2);
inputs.set_n_models_store(3);
}
InputParser inputs;
};
TEST_F(SISSOClassifierTests, FixInterceptFalseTest)
{
std::shared_ptr<FeatureSpace> feat_space = std::make_shared<FeatureSpace>(inputs);
SISSOClassifier sisso(inputs, feat_space);
std::vector<double> prop_comp(80, 0.0);
std::transform(inputs.prop_train().begin(), inputs.prop_train().end(), sisso.prop_train().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);});
EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.end(), [](double p){return p > 1e-10;}));
std::transform(inputs.prop_test().begin(), inputs.prop_test().begin() + 10, sisso.prop_test().begin(), prop_comp.begin(), [](double p1, double p2){return std::abs(p1 - p2);});
EXPECT_FALSE(std::any_of(prop_comp.begin(), prop_comp.begin() + 10, [](double p){return p > 1e-10;}));
EXPECT_EQ(sisso.n_samp(), 80);
EXPECT_EQ(sisso.n_dim(), 2);
EXPECT_EQ(sisso.n_residual(), 2);
EXPECT_EQ(sisso.n_models_store(), 3);
sisso.fit();
EXPECT_EQ(sisso.models().size(), 2);
EXPECT_EQ(sisso.models()[0].size(), 3);
EXPECT_EQ(sisso.models().back()[0].n_convex_overlap_train(), 0);
EXPECT_EQ(sisso.models().back()[0].n_convex_overlap_test(), 0);
EXPECT_EQ(sisso.models().back()[0].n_svm_misclassified_train(), 0);
EXPECT_EQ(sisso.models().back()[0].n_svm_misclassified_test(), 0);
boost::filesystem::remove_all("feature_space/");
boost::filesystem::remove_all("models/");
}
}
// // Copyright 2021 Thomas A. R. Purcell
// //
// // Licensed under the Apache License, Version 2.0 (the "License");
// // you may not use this file except in compliance with the License.
// // You may obtain a copy of the License at
// //
// // http://www.apache.org/licenses/LICENSE-2.0
// //
// // Unless required by applicable law or agreed to in writing, software
// // distributed under the License is distributed on an "AS IS" BASIS,
// // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// // See the License for the specific language governing permissions and
// // limitations under the License.
// #include "loss_function/LossFunctionConvexHull.hpp"
// #include "mpi_interface/MPI_Interface.hpp"
// #include "gtest/gtest.h"
// #include <random>
// namespace
// {
// class LossFunctionConvexHullTests : public ::testing::Test
// {
// protected:
// void SetUp() override
// {
// mpi_setup::init_mpi_env();
// _task_sizes_train = {80};
// _task_sizes_test = {20};
// node_value_arrs::initialize_values_arr(_task_sizes_train, _task_sizes_test, 2, 2, false);
// node_value_arrs::initialize_d_matrix_arr();
// node_value_arrs::resize_d_matrix_arr(2);
// std::vector<double> value_1(_task_sizes_train[0], 0.0);
// std::vector<double> value_2(_task_sizes_train[0], 0.0);
// std::vector<double> test_value_1(_task_sizes_test[0], 0.0);
// std::vector<double> test_value_2(_task_sizes_test[0], 0.0);
// std::default_random_engine generator;
// std::uniform_real_distribution<double> distribution_12_pos(1.0, 2.0);
// std::uniform_real_distribution<double> distribution_12_neg(-2.0, -1.0);
// for(int ii = 0; ii < 20; ++ii)
// {
// value_1[ii] = distribution_12_neg(generator);
// value_2[ii] = distribution_12_neg(generator);
// }
// value_1[0] = -0.99;
// value_1[1] = -2.01;
// value_2[0] = -0.99;
// value_2[1] = -2.01;
// for(int ii = 20; ii < 40; ++ii)
// {
// value_1[ii] = distribution_12_pos(generator);
// value_2[ii] = distribution_12_pos(generator);
// }
// value_1[20] = 0.99;
// value_1[21] = 2.01;
// value_2[20] = 0.99;
// value_2[21] = 2.01;
// for(int ii = 40; ii < 60; ++ii)
// {
// value_1[ii] = distribution_12_neg(generator);
// value_2[ii] = distribution_12_pos(generator);
// }
// value_1[40] = -0.99;
// value_1[41] = -2.01;
// value_2[40] = 0.99;
// value_2[41] = 2.01;
// for(int ii = 60; ii < 80; ++ii)
// {
// value_1[ii] = distribution_12_pos(generator);
// value_2[ii] = distribution_12_neg(generator);
// }
// value_1[60] = 0.99;
// value_1[61] = 2.01;
// value_2[60] = -0.99;
// value_2[61] = -2.01;
// for(int ii = 0; ii < 5; ++ii)
// {
// test_value_1[ii] = distribution_12_neg(generator);
// test_value_2[ii] = distribution_12_neg(generator);
// }
// for(int ii = 5; ii < 10; ++ii)
// {
// test_value_1[ii] = distribution_12_pos(generator);
// test_value_2[ii] = distribution_12_pos(generator);
// }
// for(int ii = 10; ii < 15; ++ii)
// {
// test_value_1[ii] = distribution_12_neg(generator);
// test_value_2[ii] = distribution_12_pos(generator);
// }
// for(int ii = 15; ii < 20; ++ii)
// {
// test_value_1[ii] = distribution_12_pos(generator);
// test_value_2[ii] = distribution_12_neg(generator);
// }
// _phi.push_back(std::make_shared<FeatureNode>(0, "A", value_1, test_value_1, Unit("m")));
// _phi.push_back(std::make_shared<FeatureNode>(1, "B", value_2, test_value_2, Unit("m")));
// _model_phi.push_back(std::make_shared<ModelNode>(_phi[0]));
// _model_phi.push_back(std::make_shared<ModelNode>(_phi[1]));
// std::copy_n(value_1.data(), _task_sizes_train[0], node_value_arrs::get_d_matrix_ptr(0));
// std::copy_n(value_2.data(), _task_sizes_train[0], node_value_arrs::get_d_matrix_ptr(1));
// _prop_train.resize(_task_sizes_train[0], 0.0);
// std::fill_n(_prop_train.begin() + 20, 20, 1.0);
// std::fill_n(_prop_train.begin() + 40, 20, 2.0);
// std::fill_n(_prop_train.begin() + 60, 20, 3.0);
// _prop_test.resize(_task_sizes_test[0], 0.0);
// std::fill_n(_prop_test.begin() + 5, 5, 1.0);
// std::fill_n(_prop_test.begin() + 10, 5, 2.0);