Commit a731c202 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

The use of the standardized values for comparision now compiles

Fails when tested thought
parent a22eb892
......@@ -1091,7 +1091,7 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std:
{
double cur_score = scores[inds[ii]];
bool valid_feat = _is_valid(
generated_phi[inds[ii]]->value_ptr(0),
generated_phi[inds[ii]]->stand_value_ptr(),
_n_samp_train,
_cross_cor_max,
scores_sel_all,
......@@ -1100,7 +1100,7 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std:
0
);
valid_feat = valid_feat && _is_valid_feat_list(
generated_phi[inds[ii]]->value_ptr(0),
generated_phi[inds[ii]]->stand_value_ptr(),
_n_samp_train,
_cross_cor_max,
phi_sel_private,
......@@ -1112,14 +1112,12 @@ void FeatureSpace::generate_and_project(std::shared_ptr<LossFunction> loss, std:
if(scores_sel_private.size() == _n_sis_select)
{
generated_phi[inds[ii]]->reindex(index_base + worst_score_ind);
generated_phi[inds[ii]]->set_value();
phi_sel_private[worst_score_ind] = generated_phi[inds[ii]];
scores_sel_private[worst_score_ind] = cur_score;
}
else
{
generated_phi[inds[ii]]->reindex(index_base + scores_sel_private.size());
generated_phi[inds[ii]]->set_value();
phi_sel_private.push_back(generated_phi[inds[ii]]);
scores_sel_private.push_back(cur_score);
}
......@@ -1200,6 +1198,7 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
_scores.resize(_phi.size());
}
#endif
// Create output directories if needed
boost::filesystem::path p(_feature_space_file.c_str());
boost::filesystem::create_directories(p.remove_filename());
......@@ -1252,7 +1251,17 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
start_time = omp_get_wtime();
while((cur_feat_local != _n_sis_select) && (ii < _scores.size()))
{
if(_is_valid(_phi[inds[ii]]->value_ptr(), _n_samp_train, _cross_cor_max, scores_sel_all, _scores[inds[ii]], cur_feat + cur_feat_local, 0))
if(
_is_valid(
_phi[inds[ii]]->stand_value_ptr(),
_n_samp_train,
_cross_cor_max,
scores_sel_all,
_scores[inds[ii]],
cur_feat + cur_feat_local,
0
)
)
{
scores_sel[cur_feat_local] = _scores[inds[ii]];
scores_sel_all[cur_feat + cur_feat_local] = _scores[inds[ii]];
......@@ -1260,6 +1269,7 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
phi_sel.back()->set_selected(true);
phi_sel.back()->set_d_mat_ind(cur_feat + cur_feat_local);
phi_sel.back()->set_value();
phi_sel.back()->set_standardized_value();
++cur_feat_local;
}
......@@ -1334,6 +1344,7 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
_phi_selected.back()->set_selected(true);
_phi_selected.back()->set_d_mat_ind(cur_feat);
_phi_selected.back()->set_value();
_phi_selected.back()->set_standardized_value();
++cur_feat_local;
++cur_feat;
}
......@@ -1393,6 +1404,7 @@ void FeatureSpace::sis(std::shared_ptr<LossFunction> loss)
_phi_selected.back()->set_selected(true);
_phi_selected.back()->set_d_mat_ind(cur_feat);
_phi_selected.back()->set_value();
_phi_selected.back()->set_standardized_value();
++cur_feat;
++cur_feat_local;
......
......@@ -48,7 +48,7 @@ std::map<std::string, int> Node::primary_feature_decomp() const
}
BOOST_SERIALIZATION_ASSUME_ABSTRACT(Node)
void Node::set_standardized_value(int offset, const bool for_comp) const
void Node::set_standardized_value(const bool for_comp) const
{
double* stand_val_ptr;
if(_selected)
......@@ -57,17 +57,17 @@ void Node::set_standardized_value(int offset, const bool for_comp) const
}
else
{
stand_val_ptr = node_value_arrs::access_temp_stand_storage(_arr_ind);
stand_val_ptr = node_value_arrs::access_temp_stand_storage(_arr_ind, for_comp);
}
util_funcs::standardize(value_ptr(offset, for_comp), _n_samp, stand_val_ptr);
util_funcs::standardize(value_ptr(-1, for_comp), _n_samp, stand_val_ptr);
}
void Node::set_standardized_test_value(int offset, const bool for_comp) const
void Node::set_standardized_test_value(const bool for_comp) const
{
double* val_ptr = value_ptr(offset, for_comp);
double* test_val_ptr = test_value_ptr(offset, for_comp);
double* stand_val_ptr = node_value_arrs::access_temp_stand_storage_test(_arr_ind);
double* val_ptr = value_ptr(-1, for_comp);
double* test_val_ptr = test_value_ptr(-1, for_comp);
double* stand_val_ptr = node_value_arrs::access_temp_stand_storage_test(_arr_ind, for_comp);
double mean = util_funcs::mean(val_ptr, _n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, _n_samp, mean);
......@@ -79,65 +79,18 @@ void Node::set_standardized_test_value(int offset, const bool for_comp) const
);
}
void Node::set_standardized_value(const double* params, int offset, const bool for_comp, const int depth) const
{
double* stand_val_ptr;
if(_selected)
{
stand_val_ptr = node_value_arrs::get_stand_d_matrix_ptr(_d_mat_ind);
}
else
{
stand_val_ptr = node_value_arrs::access_temp_stand_storage(_arr_ind);
}
util_funcs::standardize(value_ptr(params, offset, for_comp, depth), _n_samp, stand_val_ptr);
}
void Node::set_standardized_test_value(const double* params, int offset, const bool for_comp, const int depth) const
{
double* val_ptr = value_ptr(params, offset, for_comp, depth);
double* test_val_ptr = test_value_ptr(params, offset, for_comp, depth);
double* stand_val_ptr = node_value_arrs::access_temp_stand_storage_test(_arr_ind);
double mean = util_funcs::mean(val_ptr, _n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, _n_samp, mean);
std::transform(
test_val_ptr,
test_val_ptr + _n_samp_test,
stand_val_ptr,
[&](double val){return (val - mean) / stand_dev;}
);
}
double* Node::stand_value_ptr(int offset, const bool for_comp) const
{
if(_selected)
{
return node_value_arrs::get_stand_d_matrix_ptr(_d_mat_ind);
}
set_standardized_value(offset, for_comp);
return node_value_arrs::access_temp_stand_storage(_arr_ind);
}
double* Node::stand_test_value_ptr(int offset, const bool for_comp) const
{
set_standardized_test_value(offset, for_comp);
return node_value_arrs::access_temp_stand_storage_test(_arr_ind);
}
double* Node::stand_value_ptr(const double* params, int offset, const bool for_comp, const int depth) const
double* Node::stand_value_ptr(const bool for_comp) const
{
if(_selected)
{
return node_value_arrs::get_stand_d_matrix_ptr(_d_mat_ind);
}
set_standardized_value(params, offset, for_comp, depth);
return node_value_arrs::access_temp_stand_storage(_arr_ind);
set_standardized_value(for_comp);
return node_value_arrs::access_temp_stand_storage(_arr_ind, for_comp);
}
double* Node::stand_test_value_ptr(const double* params, int offset, const bool for_comp, const int depth) const
double* Node::stand_test_value_ptr(const bool for_comp) const
{
set_standardized_test_value(params, offset, for_comp, depth);
return node_value_arrs::access_temp_stand_storage_test(_arr_ind);
set_standardized_test_value(for_comp);
return node_value_arrs::access_temp_stand_storage_test(_arr_ind, for_comp);
}
......@@ -286,7 +286,7 @@ public:
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param for_comp (bool) If true then the evaluation is used for comparing features
*/
void set_standardized_value(int offset=-1, const bool for_comp=false) const;
void set_standardized_value(const bool for_comp=false) const;
/**
* @brief The pointer to where the feature's training data is stored
......@@ -306,7 +306,7 @@ public:
*
* @return pointer to the feature's training value
*/
double* stand_value_ptr(int offset=-1, const bool for_comp=false) const;
double* stand_value_ptr(const bool for_comp=false) const;
// DocString: node_set_test_value
/**
......@@ -324,7 +324,7 @@ public:
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param for_comp (bool) If true then the evaluation is used for comparing features
*/
void set_standardized_test_value(int offset=-1, const bool for_comp=false) const;
void set_standardized_test_value(const bool for_comp=false) const;
/**
* @brief The pointer to where the feature's test data is stored
......@@ -344,7 +344,7 @@ public:
*
* @return pointer to the feature's test values
*/
double* stand_test_value_ptr(int offset=-1, const bool for_comp=false) const;
double* stand_test_value_ptr(const bool for_comp=false) const;
// DocString: node_is_nan
/**
......@@ -508,16 +508,6 @@ public:
*/
virtual void set_value(const double* params, int offset=-1, const bool for_comp=false, const int depth=1) const = 0;
/**
* @brief Set the value of all training samples to the standardized for the feature inside the central data storage array
*
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param params A pointer to the bias and scale terms for this Node and its children
* @param for_comp (bool) If true then the evaluation is used for comparing features
* @param depth (int) How far down a given Node is from the root OperatorNode
*/
void set_standardized_value(const double* params, int offset=-1, const bool for_comp=false, const int depth=1) const;
/**
* @brief The pointer to where the feature's training data is stored
*
......@@ -529,17 +519,6 @@ public:
*/
virtual double* value_ptr(const double* params, int offset=-1, const bool for_comp=false, const int depth=1) const = 0;
/**
* @brief The pointer to where the feature's standardized training data is stored
*
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param params A pointer to the bias and scale terms for this Node and its children
* @param for_comp (bool) If true then the evaluation is used for comparing features
* @param depth (int) How far down a given Node is from the root OperatorNode
* @returns The pointer to the feature's data
*/
double* stand_value_ptr(const double* params, int offset=-1, const bool for_comp=false, const int depth=1) const;
/**
* @brief Set the value of all test samples for the feature inside the central data storage array
*
......@@ -550,16 +529,6 @@ public:
*/
virtual void set_test_value(const double* params, int offset=-1, const bool for_comp=false, const int depth=1) const = 0;
/**
* @brief Set the value of all test samples to the standardized for the feature inside the central data storage array
*
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param params A pointer to the bias and scale terms for this Node and its children
* @param for_comp (bool) If true then the evaluation is used for comparing features
* @param depth How far down a given Node is from the root OperatorNode
*/
void set_standardized_test_value(const double* params, int offset=-1, const bool for_comp=false, const int depth=1) const;
/**
* @brief The pointer to where the feature's test data is stored
*
......@@ -571,17 +540,6 @@ public:
*/
virtual double* test_value_ptr(const double* params, int offset=-1, const bool for_comp=false, const int depth=1) const = 0;
/**
* @brief The pointer to where the feature's standardized test data is stored
*
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param params A pointer to the bias and scale terms for this Node and its children
* @param for_comp (bool) If true then the evaluation is used for comparing features
* @param depth How far down a given Node is from the root OperatorNode
* @returns The pointer to the feature's data
*/
double* stand_test_value_ptr(const double* params, int offset=-1, const bool for_comp=false, const int depth=1) const;
/**
* @brief The expression of the feature
*
......
......@@ -31,7 +31,6 @@ int node_value_arrs::MAX_N_THREADS = omp_get_max_threads();
int node_value_arrs::N_OP_SLOTS = 0;
int node_value_arrs::N_PARAM_OP_SLOTS = 0;
int node_value_arrs::MAX_RUNG = 0;
int node_value_arrs::SZ_STAND_FEAT = 0;
std::vector<int> node_value_arrs::TEMP_STORAGE_REG;
std::vector<int> node_value_arrs::TEMP_STORAGE_TEST_REG;
......@@ -63,13 +62,10 @@ void node_value_arrs::initialize_values_arr(
N_RUNGS_STORED = 0;
N_STORE_FEATURES = n_primary_feat;
N_PRIMARY_FEATURES = n_primary_feat;
SZ_STAND_FEAT = std::max(N_SAMPLES, N_SELECTED);
VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES);
TEST_VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES_TEST);
STANDARDIZED_STORAGE_ARR = std::vector<double>((N_STORE_FEATURES + 1) * MAX_N_THREADS * SZ_STAND_FEAT);
STANDARDIZED_TEST_STORAGE_ARR = std::vector<double>((N_STORE_FEATURES + 1) * MAX_N_THREADS * N_SAMPLES_TEST);
STANDARDIZED_TEST_STORAGE_ARR = std::vector<double>(2 * (N_PRIMARY_FEATURES + 1) * MAX_N_THREADS * N_SAMPLES_TEST);
}
void node_value_arrs::initialize_values_arr(
......@@ -256,9 +252,6 @@ void node_value_arrs::resize_d_matrix_arr(const int n_select)
STANDARDIZED_D_MATRIX.resize(N_SELECTED * N_SAMPLES, 0.0);
STANDARDIZED_D_MATRIX.shrink_to_fit();
SZ_STAND_FEAT = std::max(N_SAMPLES, N_SELECTED);
STANDARDIZED_STORAGE_ARR = std::vector<double>((N_STORE_FEATURES + 1) * MAX_N_THREADS * SZ_STAND_FEAT);
}
void node_value_arrs::finalize_values_arr()
......
......@@ -63,7 +63,6 @@ namespace node_value_arrs
extern std::vector<double> STANDARDIZED_TEST_STORAGE_ARR; //!< //!< The vector used to temporarily store the values of the standardized feature test values
extern int N_SELECTED; //!< Number of selected features
extern int SZ_STAND_FEAT; //!< std::max(N_SELECTED, N_SAMPLES)
extern int N_SAMPLES; //!< Number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN)
extern int N_SAMPLES_TEST; //!< Number of test samples for each feature (Sum of all elements in TASK_SZ_TEST)
......@@ -298,20 +297,28 @@ namespace node_value_arrs
/**
* @brief Access element of temporary standardized storage array for the training data
*
* @param slot The slot of the temporary storage array
* @param arr_ind The array index of the feature
* @param for_comp True if used for a comparison
*
* @return pointer to the data stored in the specified slot
*/
inline double* access_temp_stand_storage(const unsigned long int slot){return &STANDARDIZED_STORAGE_ARR[slot*SZ_STAND_FEAT];}
inline double* access_temp_stand_storage(const unsigned long int arr_ind, const bool for_comp)
{
return &STANDARDIZED_STORAGE_ARR[((arr_ind % N_PRIMARY_FEATURES) + for_comp * N_PRIMARY_FEATURES) * N_PRIMARY_FEATURES];
}
/**
* @brief Access element of temporary standardized storage array for the test data
*
* @param slot The slot of the temporary storage array
* @param arr_ind The array index of the feature
* @param for_comp True if used for a comparison
*
* @return pointer to the data stored in the specified slot
*/
inline double* access_temp_stand_storage_test(const unsigned long int slot){return &STANDARDIZED_TEST_STORAGE_ARR[slot*N_SAMPLES_TEST];}
inline double* access_temp_stand_storage_test(const unsigned long int arr_ind, const bool for_comp)
{
return &STANDARDIZED_TEST_STORAGE_ARR[((arr_ind % N_PRIMARY_FEATURES) + for_comp * N_PRIMARY_FEATURES) * N_SAMPLES_TEST];
}
/**
* @brief Access the param storage array
......
......@@ -21,6 +21,7 @@
#include "utils/compare_features.hpp"
#include <iomanip>
std::vector<double> comp_feats::DGEMV_OUT;
std::vector<double> comp_feats::RANK;
std::vector<int> comp_feats::INDEX;
......@@ -36,6 +37,7 @@ void comp_feats::set_is_valid_fxn(
{
if(max_corr < 0.99999)
{
DGEMV_OUT.resize(n_samp);
is_valid = valid_feature_against_selected_pearson;
is_valid_feat_list = valid_feature_against_selected_pearson_feat_list;
}
......@@ -79,9 +81,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1(
const int start_sel
)
{
double mean = util_funcs::mean<double>(val_ptr, n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, n_samp, mean);
double base_val = util_funcs::r(val_ptr, val_ptr, n_samp, mean, stand_dev, mean, stand_dev);
double base_val = std::inner_product(val_ptr, val_ptr + n_samp, val_ptr, 0.0);
for(int dd = start_sel; dd < end_sel; ++dd)
{
......@@ -90,8 +90,11 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1(
continue;
}
double comp_value = (
base_val - std::abs(util_funcs::r(val_ptr, node_value_arrs::get_d_matrix_ptr(dd), n_samp, mean, stand_dev))
double comp_value = std::inner_product(
val_ptr,
val_ptr + n_samp,
node_value_arrs::get_stand_d_matrix_ptr(dd),
-1.0 * base_val
);
if(std::abs(comp_value) < 5.0e-9)
{
......@@ -110,9 +113,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1_feat_list(
const double cur_score
)
{
double mean = util_funcs::mean<double>(val_ptr, n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, n_samp, mean);
double base_val = util_funcs::r(val_ptr, val_ptr, n_samp, mean, stand_dev, mean, stand_dev);
double base_val = std::inner_product(val_ptr, val_ptr + n_samp, val_ptr, 0.0);
for(int ff = 0; ff < selected.size(); ++ff)
{
......@@ -121,9 +122,13 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1_feat_list(
continue;
}
double comp_value = (
base_val - std::abs(util_funcs::r(val_ptr, selected[ff]->value_ptr(-1, true), n_samp, mean, stand_dev))
double comp_value = std::inner_product(
val_ptr,
val_ptr + n_samp,
selected[ff]->stand_value_ptr(true),
-1.0 * base_val
);
if(std::abs(comp_value) < 5.0e-9)
{
return false;
......@@ -140,9 +145,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1_mpi_op(
const double cur_score
)
{
double mean = util_funcs::mean<double>(val_ptr, n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, n_samp, mean);
double base_val = util_funcs::r(val_ptr, val_ptr, n_samp, mean, stand_dev, mean, stand_dev);
double base_val = std::inner_product(val_ptr, val_ptr + n_samp, val_ptr, 0.0);
for(auto& feat_sc : out_vec)
{
......@@ -151,9 +154,13 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1_mpi_op(
continue;
}
double comp_value = (
base_val - std::abs(util_funcs::r(val_ptr, feat_sc._feat->value_ptr(-1, true), n_samp, mean, stand_dev))
double comp_value = std::inner_product(
val_ptr,
val_ptr + n_samp,
feat_sc._feat->stand_value_ptr(true),
-1.0 * base_val
);
if(std::abs(comp_value) < 5.0e-9)
{
return false;
......@@ -173,29 +180,30 @@ bool comp_feats::valid_feature_against_selected_pearson(
const int start_sel
)
{
double mean = util_funcs::mean<double>(val_ptr, n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, n_samp, mean);
double base_val = util_funcs::r(val_ptr, val_ptr, n_samp, mean, stand_dev, mean, stand_dev);
DGEMV_OUT.resize(end_sel - start_sel);
double base_val = std::inner_product(val_ptr, val_ptr + n_samp, val_ptr, 0.0);
volatile bool is_valid = true;
#pragma omp parallel for schedule(dynamic)
for(int dd = start_sel; dd < end_sel; ++dd)
{
if(!is_valid)
{
continue;
}
dgemv_(
'N',
DGEMV_OUT.size(),
n_samp,
1.0,
node_value_arrs::get_stand_d_matrix_ptr(start_sel),
DGEMV_OUT.size(),
val_ptr,
1,
0.0,
DGEMV_OUT.data(),
1
);
double comp_value = (
base_val - std::abs(util_funcs::r(val_ptr, node_value_arrs::get_d_matrix_ptr(dd), n_samp, mean, stand_dev))
);
if(std::abs(comp_value) < (1.0 - cross_cor_max + 5.0e-9))
{
is_valid = false;
}
}
return is_valid;
double comp_value = (
base_val - std::abs(DGEMV_OUT[idamax_(DGEMV_OUT.size(), DGEMV_OUT.data(), 1)])
);
return comp_value < (1.0 - cross_cor_max + 5.0e-9);
}
bool comp_feats::valid_feature_against_selected_pearson_feat_list(
......@@ -207,15 +215,14 @@ bool comp_feats::valid_feature_against_selected_pearson_feat_list(
const double cur_score
)
{
double mean = util_funcs::mean<double>(val_ptr, n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, n_samp, mean);
double base_val = util_funcs::r(val_ptr, val_ptr, n_samp, mean, stand_dev, mean, stand_dev);
double base_val = std::inner_product(val_ptr, val_ptr + n_samp, val_ptr, 0.0);
for(auto& feat : selected)
{
double comp_value = (
base_val - std::abs(util_funcs::r(val_ptr, feat->value_ptr(-1, true), n_samp, mean, stand_dev))
double comp_value = std::abs(
std::inner_product(val_ptr, val_ptr + n_samp, feat->stand_value_ptr(true), -base_val)
);
if(std::abs(comp_value) < (1.0 - cross_cor_max + 5.0e-9))
{
return false;
......@@ -232,15 +239,14 @@ bool comp_feats::valid_feature_against_selected_pearson_mpi_op(
const double cur_score
)
{
double mean = util_funcs::mean<double>(val_ptr, n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, n_samp, mean);
double base_val = util_funcs::r(val_ptr, val_ptr, n_samp, mean, stand_dev, mean, stand_dev);
double base_val = std::inner_product(val_ptr, val_ptr + n_samp, val_ptr, 0.0);
for(auto& feat_sc : out_vec)
{
double comp_value = (
base_val - std::abs(util_funcs::r(val_ptr, feat_sc._feat->value_ptr(-1, true), n_samp, mean, stand_dev))
double comp_value = std::abs(
std::inner_product(val_ptr, val_ptr + n_samp, feat_sc._feat->stand_value_ptr(true), -base_val)
);
if(std::abs(comp_value) < (1.0 - cross_cor_max + 5.0e-9))
{
return false;
......
......@@ -121,6 +121,7 @@ struct node_sc_pair
namespace comp_feats
{
extern std::vector<double> DGEMV_OUT; //!< Function used to store the output of DGEMV
extern std::vector<double> RANK; //!< Global variable used to store the rank variables for Spearman correlation
extern std::vector<int> INDEX; //!< Global variable used to store the sorting indexes for Spearman correlation
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment