Commit 04ea60aa authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Bug Fix

removing duplicates did not remove every duplicate now it does
Also standarized the function somewhat
parent 3b577ab8
......@@ -605,48 +605,28 @@ void FeatureSpace::remove_duplicate_features(std::vector<node_ptr>& feat_set, in
0.0
)
);
if(scores[inds[sc]] > -1e-7)
{
// If score is 0.0 then check against all other 0.0 values
for(int sc2 = 0; sc2 < sc; ++sc2)
{
double comp = 1.0 / static_cast<double>(_n_samp_train) * std::abs(
base_val -
std::abs(
std::inner_product(
val_ptr,
val_ptr + _n_samp_train,
feat_set[start + inds[sc2]]->stand_value_ptr(true),
0.0
)
)
);
if(comp < 1e-10)
{
del_inds.push_back(-1 * (inds[sc] + start));
break;
}
}
}
else if(scores[inds[sc + 1]] - scores[inds[sc]] < 1e-7)
int sc2 = sc + 1;
while((sc2 < scores.size()) && (scores[inds[sc2]] - scores[inds[sc]] < 1e-7))
{
// Otherwise just compare against the closest neighbor
double comp = 1.0 / static_cast<double>(_n_samp_train) * std::abs(
double comp = std::abs(
base_val -
std::abs(
std::inner_product(
val_ptr,
val_ptr + _n_samp_train,
feat_set[start + inds[sc + 1]]->stand_value_ptr(true),
feat_set[start + inds[sc2]]->stand_value_ptr(true),
0.0
)
)
);
if(comp < 1e-10)
if(comp / static_cast<double>(_n_samp_train) < 1e-10)
{
del_inds.push_back(-1 * (inds[sc] + start));
break;
}
++sc2;
}
}
......
......@@ -22,18 +22,18 @@ namespace
protected:
void SetUp() override
{
node_value_arrs::initialize_values_arr({4}, {1}, 4, 2, false);
node_value_arrs::initialize_values_arr({8}, {1}, 4, 2, false);
_value_1 = {1.0, 2.0, 3.0, 4.0};
_value_1 = {2, 4, 4, 4, 5, 5, 7, 9};
_test_value_1 = {5.0};
_value_2 = {10.0, 10.0, 10.0, 1.0};
_value_2 = {10.0, 10.0, 10.0, 1.0, 10.0, 10.0, 10.0, 1.0};
_test_value_2 = {10.0};
_value_3 = {1.0, 2.0, 3.0, 1.0};
_value_3 = {1.0, 2.0, 3.0, 1.0, 1.0, 4.0, 5.0, 1.0};
_test_value_3 = {5.0};
_value_4 = {1.0, 2.0, 3.0};
_value_4 = {1.0, 2.0, 3.0,};
_test_value_4 = {};
}
......@@ -201,4 +201,24 @@ namespace
#endif
}
TEST_F(FeatureNodeTest, StandValTests)
{
std::vector<double> test_std = {-1.5, -0.5, -0.5, -0.5, 0, 0, 1, 2};
std::shared_ptr<FeatureNode> feat_1 = std::make_shared<FeatureNode>(
0,
"A",
_value_1,
_test_value_1,
Unit("m")
);
double* stand_value_ptr = feat_1->stand_value_ptr();
std::transform(test_std.begin(), test_std.end(), stand_value_ptr, test_std.begin(), std::minus<double>());
EXPECT_TRUE(std::all_of(test_std.begin(), test_std.end(), [](double val){return std::abs(val) < 1e-10;}));
stand_value_ptr = feat_1->stand_test_value_ptr();
EXPECT_LT(std::abs(*stand_value_ptr), 1e-10);
}
}
......@@ -244,4 +244,65 @@ namespace
EXPECT_THROW(feat_space.sis(_prop), std::logic_error);
}
TEST_F(FeatSpaceTest, RemoveDuplicatesTest)
{
node_value_arrs::finalize_values_arr();
node_value_arrs::initialize_values_arr({10}, {0}, 8, 0, false);
InputParser inputs;
inputs.set_task_sizes_train({10});
inputs.set_allowed_ops({"sq"});
inputs.set_allowed_param_ops({});
inputs.set_cross_cor_max(1.0);
inputs.set_l_bound(1e-50);
inputs.set_u_bound(1e50);
inputs.set_n_rung_store(0);
inputs.set_max_rung(0);
inputs.set_n_sis_select(10);
inputs.set_n_rung_generate(0);
inputs.set_max_param_depth(0);
inputs.set_reparam_residual(false);
inputs.set_calc_type("regression");
std::vector<double> value_1 = { 1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
std::vector<double> value_2 = { 0.0, 0.0, 1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
std::vector<double> value_3 = { 1.0, -1.0, 1.0, -1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0};
std::vector<double> value_4 = { 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0, 0.0, 0.0};
std::vector<double> value_6 = { 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, -1.0, 1.0, 0.0, 0.0};
std::vector<double> value_5 = { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, 0.0, 0.0};
std::vector<double> prop = { 0.0, 0.0, 0.0, 0.0, -1.0, 1.0, -1.0, 1.0, 0.0, 0.0};
inputs.set_prop_train(prop);
FeatureNode feat_1(0, "A", value_1, std::vector<double>(), Unit());
FeatureNode feat_2(1, "B", value_2, std::vector<double>(), Unit());
FeatureNode feat_3(2, "C", value_3, std::vector<double>(), Unit());
FeatureNode feat_4(3, "D", value_4, std::vector<double>(), Unit());
FeatureNode feat_5(4, "E", value_5, std::vector<double>(), Unit());
FeatureNode feat_6(5, "F", value_6, std::vector<double>(), Unit());
FeatureNode feat_7(6, "G", value_4, std::vector<double>(), Unit());
FeatureNode feat_8(7, "H", value_1, std::vector<double>(), Unit());
std::vector<FeatureNode> phi_0 = {
feat_1,
feat_2,
feat_3,
feat_4,
feat_5,
feat_6,
feat_7,
feat_8
};
inputs.set_phi_0(phi_0);
std::vector<node_ptr> phi(8, nullptr);
std::transform(phi_0.begin(), phi_0.end(), phi.begin(), [](FeatureNode feat){return std::make_shared<FeatureNode>(feat);});
FeatureSpace feat_space(inputs);
feat_space.remove_duplicate_features(phi, 2);
EXPECT_EQ(phi.size(), 7);
feat_space.remove_duplicate_features(phi, 0);
EXPECT_EQ(phi.size(), 6);
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment