Commit 7786a5a3 authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Merge branch 'data_overwrite_error' into 'joss'

Use inner_product of Standardized Values instead of r for calculating overlap

See merge request tpurcell/cpp_sisso!37
parents 34492b28 6f8b8658
...@@ -63,14 +63,15 @@ if(EXTERNAL_BOOST) ...@@ -63,14 +63,15 @@ if(EXTERNAL_BOOST)
message(STATUS "Using external boost") message(STATUS "Using external boost")
set(EXTERNAL_BOOST TRUE) set(EXTERNAL_BOOST TRUE)
else(EXTERNAL_BOOST) else(EXTERNAL_BOOST)
if(NOT DEFINED EXTERNAL_BUILD_N_PROCS)
set(EXTERNAL_BUILD_N_PROCS 1 CACHE STRING "Number of processes to use when building Boost")
endif()
message(STATUS "Building boost wth ${EXTERNAL_BUILD_N_PROCS} process(es)") message(STATUS "Building boost wth ${EXTERNAL_BUILD_N_PROCS} process(es)")
include( ExternalProject ) include( ExternalProject )
set(EXTERNAL_BOOST FALSE) set(EXTERNAL_BOOST FALSE)
endif() endif()
if(NOT DEFINED EXTERNAL_BUILD_N_PROCS)
set(EXTERNAL_BUILD_N_PROCS 1 CACHE STRING "Number of processes to use when building Boost")
endif()
# Check for FindOpenMP # Check for FindOpenMP
find_package(OpenMP REQUIRED) find_package(OpenMP REQUIRED)
if (OPENMP_FOUND) if (OPENMP_FOUND)
......
...@@ -79,7 +79,7 @@ class FeatureSpace ...@@ -79,7 +79,7 @@ class FeatureSpace
const std::string _phi_out_file; //!< Filename of the file to output the feature set to const std::string _phi_out_file; //!< Filename of the file to output the feature set to
std::function<bool(const double*, const int, const double, const std::vector<double>&, const double, const int, const int)> _is_valid; //!< Function used to determine of a feature is too correlated to previously selected features std::function<bool(const double*, const int, const double, const std::vector<double>&, const double, const int, const int)> _is_valid; //!< Function used to determine of a feature is too correlated to previously selected features
std::function<bool(const double*, const int, const double, const std::vector<node_ptr>&, const std::vector<double>&, const double)> _is_valid_feat_list; //!< Function used to determine of a feature is too correlated to previously selected features within a given list std::function<int(const double*, const int, const double, const std::vector<node_ptr>&, const std::vector<double>&, const double)> _is_valid_feat_list; //!< Function used to determine of a feature is too correlated to previously selected features within a given list
std::shared_ptr<MPI_Interface> _mpi_comm; //!< the MPI communicator for the calculation std::shared_ptr<MPI_Interface> _mpi_comm; //!< the MPI communicator for the calculation
...@@ -146,6 +146,24 @@ public: ...@@ -146,6 +146,24 @@ public:
*/ */
void initialize_fs_output_files() const; void initialize_fs_output_files() const;
/**
* @brief Remove duplicate features from the feature space
*
* @param feat_set Feature space to remove the duplicates from
* @param start The index to start the removal from
*/
void remove_duplicate_features(std::vector<node_ptr>& feat_set, int start);
#ifdef PARAMETERIZE
/**
* @brief Reorder features based on the number of parameters they have (smallest to largest)
*
* @param feat_set Feature space to remove the duplicates from
* @param start The index to start the removal from
*/
int reorder_by_n_params(std::vector<node_ptr>& feat_set, int start);
#endif
/** /**
* @brief Populate _phi using _phi_0 and the allowed operators up to (_max_rung - _n_rung_generate)^th rung * @brief Populate _phi using _phi_0 and the allowed operators up to (_max_rung - _n_rung_generate)^th rung
*/ */
......
...@@ -48,3 +48,49 @@ std::map<std::string, int> Node::primary_feature_decomp() const ...@@ -48,3 +48,49 @@ std::map<std::string, int> Node::primary_feature_decomp() const
} }
BOOST_SERIALIZATION_ASSUME_ABSTRACT(Node) BOOST_SERIALIZATION_ASSUME_ABSTRACT(Node)
void Node::set_standardized_value(const bool for_comp) const
{
double* stand_val_ptr;
if(_selected)
{
stand_val_ptr = node_value_arrs::get_stand_d_matrix_ptr(_d_mat_ind);
}
else
{
stand_val_ptr = node_value_arrs::access_temp_stand_storage(_arr_ind, for_comp);
}
util_funcs::standardize(value_ptr(-1, for_comp), _n_samp, stand_val_ptr);
}
void Node::set_standardized_test_value(const bool for_comp) const
{
double* val_ptr = value_ptr(-1, for_comp);
double* test_val_ptr = test_value_ptr(-1, for_comp);
double* stand_val_ptr = node_value_arrs::access_temp_stand_storage_test(_arr_ind, for_comp);
double mean = util_funcs::mean(val_ptr, _n_samp);
double stand_dev = util_funcs::stand_dev(val_ptr, _n_samp, mean);
std::transform(
test_val_ptr,
test_val_ptr + _n_samp_test,
stand_val_ptr,
[&](double val){return (val - mean) / stand_dev;}
);
}
double* Node::stand_value_ptr(const bool for_comp) const
{
if(_selected)
{
return node_value_arrs::get_stand_d_matrix_ptr(_d_mat_ind);
}
set_standardized_value(for_comp);
return node_value_arrs::access_temp_stand_storage(_arr_ind, for_comp);
}
double* Node::stand_test_value_ptr(const bool for_comp) const
{
set_standardized_test_value(for_comp);
return node_value_arrs::access_temp_stand_storage_test(_arr_ind, for_comp);
}
...@@ -279,6 +279,15 @@ public: ...@@ -279,6 +279,15 @@ public:
*/ */
virtual void set_value(int offset=-1, const bool for_comp=false) const = 0; virtual void set_value(int offset=-1, const bool for_comp=false) const = 0;
// DocString: node_set_stand_value
/**
* @brief Set the value of all training samples to the standardized values for the feature inside the central data storage arrays
*
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param for_comp (bool) If true then the evaluation is used for comparing features
*/
void set_standardized_value(const bool for_comp=false) const;
/** /**
* @brief The pointer to where the feature's training data is stored * @brief The pointer to where the feature's training data is stored
* *
...@@ -289,6 +298,16 @@ public: ...@@ -289,6 +298,16 @@ public:
*/ */
virtual double* value_ptr(int offset=-1, const bool for_comp=false) const = 0; virtual double* value_ptr(int offset=-1, const bool for_comp=false) const = 0;
/**
* @brief The pointer to where the feature's standardized training data is stored
*
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param for_comp (bool) If true then the evaluation is used for comparing features
*
* @return pointer to the feature's training value
*/
double* stand_value_ptr(const bool for_comp=false) const;
// DocString: node_set_test_value // DocString: node_set_test_value
/** /**
* @brief Set the value of all test samples for the feature inside the central data storage array * @brief Set the value of all test samples for the feature inside the central data storage array
...@@ -298,6 +317,15 @@ public: ...@@ -298,6 +317,15 @@ public:
*/ */
virtual void set_test_value(int offset=-1, const bool for_comp=false) const = 0; virtual void set_test_value(int offset=-1, const bool for_comp=false) const = 0;
// DocString: node_set_stand_test_value
/**
* @brief Set the value of all test samples to the standardized values for the feature inside the central data storage array
*
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param for_comp (bool) If true then the evaluation is used for comparing features
*/
void set_standardized_test_value(const bool for_comp=false) const;
/** /**
* @brief The pointer to where the feature's test data is stored * @brief The pointer to where the feature's test data is stored
* *
...@@ -308,6 +336,16 @@ public: ...@@ -308,6 +336,16 @@ public:
*/ */
virtual double* test_value_ptr(int offset=-1, const bool for_comp=false) const = 0; virtual double* test_value_ptr(int offset=-1, const bool for_comp=false) const = 0;
/**
* @brief The pointer to where the feature's standardized test data is stored
*
* @param offset (int) Where the current node is in the binary expression tree relative to other nodes at the same depth
* @param for_comp (bool) If true then the evaluation is used for comparing features
*
* @return pointer to the feature's test values
*/
double* stand_test_value_ptr(const bool for_comp=false) const;
// DocString: node_is_nan // DocString: node_is_nan
/** /**
* @brief Check if the feature has a NaN value in it * @brief Check if the feature has a NaN value in it
......
...@@ -47,6 +47,10 @@ std::vector<double> node_value_arrs::TEST_VALUES_ARR; ...@@ -47,6 +47,10 @@ std::vector<double> node_value_arrs::TEST_VALUES_ARR;
std::vector<double> node_value_arrs::TEMP_STORAGE_ARR; std::vector<double> node_value_arrs::TEMP_STORAGE_ARR;
std::vector<double> node_value_arrs::TEMP_STORAGE_TEST_ARR; std::vector<double> node_value_arrs::TEMP_STORAGE_TEST_ARR;
std::vector<double> node_value_arrs::STANDARDIZED_D_MATRIX;
std::vector<double> node_value_arrs::STANDARDIZED_STORAGE_ARR;
std::vector<double> node_value_arrs::STANDARDIZED_TEST_STORAGE_ARR;
void node_value_arrs::initialize_values_arr( void node_value_arrs::initialize_values_arr(
const int n_samples, const int n_samples,
const int n_samples_test, const int n_samples_test,
...@@ -61,6 +65,8 @@ void node_value_arrs::initialize_values_arr( ...@@ -61,6 +65,8 @@ void node_value_arrs::initialize_values_arr(
VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES); VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES);
TEST_VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES_TEST); TEST_VALUES_ARR = std::vector<double>(N_STORE_FEATURES * N_SAMPLES_TEST);
STANDARDIZED_STORAGE_ARR = std::vector<double>(2 * (N_PRIMARY_FEATURES + 1) * N_SAMPLES * MAX_N_THREADS);
STANDARDIZED_TEST_STORAGE_ARR = std::vector<double>(2 * (N_PRIMARY_FEATURES + 1) * N_SAMPLES_TEST * MAX_N_THREADS);
} }
void node_value_arrs::initialize_values_arr( void node_value_arrs::initialize_values_arr(
...@@ -174,6 +180,9 @@ void node_value_arrs::resize_values_arr(const int n_dims, const int n_feat) ...@@ -174,6 +180,9 @@ void node_value_arrs::resize_values_arr(const int n_dims, const int n_feat)
{ {
N_PRIMARY_FEATURES = N_STORE_FEATURES; N_PRIMARY_FEATURES = N_STORE_FEATURES;
STANDARDIZED_STORAGE_ARR = std::vector<double>(2 * (N_PRIMARY_FEATURES + 1) * N_SAMPLES * MAX_N_THREADS);
STANDARDIZED_TEST_STORAGE_ARR = std::vector<double>(2 * (N_PRIMARY_FEATURES + 1) * N_SAMPLES_TEST * MAX_N_THREADS);
TEMP_STORAGE_ARR.resize(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1) * N_SAMPLES); TEMP_STORAGE_ARR.resize(MAX_N_THREADS * (N_OP_SLOTS * N_PRIMARY_FEATURES + 1) * N_SAMPLES);
TEMP_STORAGE_ARR.shrink_to_fit(); TEMP_STORAGE_ARR.shrink_to_fit();
...@@ -236,6 +245,7 @@ void node_value_arrs::initialize_d_matrix_arr() ...@@ -236,6 +245,7 @@ void node_value_arrs::initialize_d_matrix_arr()
{ {
N_SELECTED = 0; N_SELECTED = 0;
D_MATRIX = std::vector<double>(0); D_MATRIX = std::vector<double>(0);
STANDARDIZED_D_MATRIX = std::vector<double>(0);
} }
void node_value_arrs::resize_d_matrix_arr(const int n_select) void node_value_arrs::resize_d_matrix_arr(const int n_select)
...@@ -243,6 +253,9 @@ void node_value_arrs::resize_d_matrix_arr(const int n_select) ...@@ -243,6 +253,9 @@ void node_value_arrs::resize_d_matrix_arr(const int n_select)
N_SELECTED += n_select; N_SELECTED += n_select;
D_MATRIX.resize(N_SELECTED * N_SAMPLES, 0.0); D_MATRIX.resize(N_SELECTED * N_SAMPLES, 0.0);
D_MATRIX.shrink_to_fit(); D_MATRIX.shrink_to_fit();
STANDARDIZED_D_MATRIX.resize(N_SELECTED * N_SAMPLES, 0.0);
STANDARDIZED_D_MATRIX.shrink_to_fit();
} }
void node_value_arrs::finalize_values_arr() void node_value_arrs::finalize_values_arr()
...@@ -265,11 +278,18 @@ void node_value_arrs::finalize_values_arr() ...@@ -265,11 +278,18 @@ void node_value_arrs::finalize_values_arr()
TASK_START_TRAIN.resize(0); TASK_START_TRAIN.resize(0);
TASK_SZ_TEST.resize(0); TASK_SZ_TEST.resize(0);
PARAM_STORAGE_ARR.resize(0);
PARAM_STORAGE_TEST_ARR.resize(0);
D_MATRIX.resize(0); D_MATRIX.resize(0);
VALUES_ARR.resize(0); VALUES_ARR.resize(0);
TEST_VALUES_ARR.resize(0); TEST_VALUES_ARR.resize(0);
TEMP_STORAGE_ARR.resize(0); TEMP_STORAGE_ARR.resize(0);
TEMP_STORAGE_TEST_ARR.resize(0); TEMP_STORAGE_TEST_ARR.resize(0);
PARAM_STORAGE_ARR.resize(0);
PARAM_STORAGE_TEST_ARR.resize(0);
STANDARDIZED_D_MATRIX.resize(0);
STANDARDIZED_STORAGE_ARR.resize(0);
STANDARDIZED_TEST_STORAGE_ARR.resize(0);
} }
...@@ -58,6 +58,10 @@ namespace node_value_arrs ...@@ -58,6 +58,10 @@ namespace node_value_arrs
extern std::vector<int> TASK_START_TRAIN; //!< The starting point for each task in the training data extern std::vector<int> TASK_START_TRAIN; //!< The starting point for each task in the training data
extern std::vector<int> TASK_SZ_TEST; //!< Number of test sample per task extern std::vector<int> TASK_SZ_TEST; //!< Number of test sample per task
extern std::vector<double> STANDARDIZED_D_MATRIX; //!< The descriptor matrix filled with standardized feature values (Central storage for the selected feature space)
extern std::vector<double> STANDARDIZED_STORAGE_ARR; //!< //!< The vector used to temporarily store the values of the standardized feature training values
extern std::vector<double> STANDARDIZED_TEST_STORAGE_ARR; //!< //!< The vector used to temporarily store the values of the standardized feature test values
extern int N_SELECTED; //!< Number of selected features extern int N_SELECTED; //!< Number of selected features
extern int N_SAMPLES; //!< Number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN) extern int N_SAMPLES; //!< Number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN)
...@@ -290,6 +294,38 @@ namespace node_value_arrs ...@@ -290,6 +294,38 @@ namespace node_value_arrs
*/ */
inline double* access_temp_storage_test(const unsigned long int slot){return &TEMP_STORAGE_TEST_ARR[slot*N_SAMPLES_TEST];} inline double* access_temp_storage_test(const unsigned long int slot){return &TEMP_STORAGE_TEST_ARR[slot*N_SAMPLES_TEST];}
/**
* @brief Access element of temporary standardized storage array for the training data
*
* @param arr_ind The array index of the feature
* @param for_comp True if used for a comparison
*
* @return pointer to the data stored in the specified slot
*/
inline double* access_temp_stand_storage(const unsigned long int arr_ind, const bool for_comp)
{
return &STANDARDIZED_STORAGE_ARR[
((arr_ind % N_PRIMARY_FEATURES) + for_comp * N_PRIMARY_FEATURES) * N_SAMPLES +
omp_get_thread_num() * 2 * (N_PRIMARY_FEATURES + 1) * N_SAMPLES
];
}
/**
* @brief Access element of temporary standardized storage array for the test data
*
* @param arr_ind The array index of the feature
* @param for_comp True if used for a comparison
*
* @return pointer to the data stored in the specified slot
*/
inline double* access_temp_stand_storage_test(const unsigned long int arr_ind, const bool for_comp)
{
return &STANDARDIZED_TEST_STORAGE_ARR[
((arr_ind % N_PRIMARY_FEATURES) + for_comp * N_PRIMARY_FEATURES) * N_SAMPLES_TEST +
omp_get_thread_num() * 2 * (N_PRIMARY_FEATURES + 1) * N_SAMPLES_TEST
];
}
/** /**
* @brief Access the param storage array * @brief Access the param storage array
* *
...@@ -367,7 +403,7 @@ namespace node_value_arrs ...@@ -367,7 +403,7 @@ namespace node_value_arrs
); );
/** /**
* @brief Get the pointer to a particular selected Node from sis * @brief Get the pointer to a particular selected Node's data from sis
* *
* @param ind Index of the data in the descriptor matrix * @param ind Index of the data in the descriptor matrix
* @return The pointer to the descriptor matrix's data * @return The pointer to the descriptor matrix's data
...@@ -375,7 +411,7 @@ namespace node_value_arrs ...@@ -375,7 +411,7 @@ namespace node_value_arrs
inline double* get_d_matrix_ptr(const int ind){return &D_MATRIX[ind * N_SAMPLES];} inline double* get_d_matrix_ptr(const int ind){return &D_MATRIX[ind * N_SAMPLES];}
/** /**
* @brief Get the pointer to a particular selected Node from sis * @brief Get the pointer to a particular selected Node's data from sis
* *
* @param ind Index of the data in the descriptor matrix * @param ind Index of the data in the descriptor matrix
* @param taskind The index for the given task * @param taskind The index for the given task
...@@ -383,6 +419,23 @@ namespace node_value_arrs ...@@ -383,6 +419,23 @@ namespace node_value_arrs
*/ */
inline double* get_d_matrix_ptr(const int ind, const int taskind){return &D_MATRIX[ind * N_SAMPLES + TASK_START_TRAIN[taskind]];} inline double* get_d_matrix_ptr(const int ind, const int taskind){return &D_MATRIX[ind * N_SAMPLES + TASK_START_TRAIN[taskind]];}
/**
* @brief Get the pointer to a particular selected Node's standardized from sis
*
* @param ind Index of the data in the descriptor matrix
* @return The pointer to the descriptor matrix's standardized data
*/
inline double* get_stand_d_matrix_ptr(const int ind){return &STANDARDIZED_D_MATRIX[ind * N_SAMPLES];}
/**
* @brief Get the pointer to a particular selected Node's standardized from sis
*
* @param ind Index of the data in the descriptor matrix
* @param taskind The index for the given task
* @return The pointer to the descriptor matrix's standardized data
*/
inline double* get_stand_d_matrix_ptr(const int ind, const int taskind){return &STANDARDIZED_D_MATRIX[ind * N_SAMPLES + TASK_START_TRAIN[taskind]];}
/** /**
* @brief Flush the temporary storage register (training data) * @brief Flush the temporary storage register (training data)
* @details Reset all slots in the register to -1 * @details Reset all slots in the register to -1
......
...@@ -65,20 +65,20 @@ std::vector<node_sc_pair> mpi_reduce_op::select_top_feats(std::vector<node_sc_pa ...@@ -65,20 +65,20 @@ std::vector<node_sc_pair> mpi_reduce_op::select_top_feats(std::vector<node_sc_pa
// Merge input vectors and sort // Merge input vectors and sort
in_vec_2.insert(in_vec_2.end(), in_vec_1.begin(), in_vec_1.end()); in_vec_2.insert(in_vec_2.end(), in_vec_1.begin(), in_vec_1.end());
std::sort(in_vec_2.begin(), in_vec_2.end(), my_sorter); std::sort(in_vec_2.begin(), in_vec_2.end());
// Populate the output vector // Populate the output vector
int ff = 0; int ff = 0;
int out_ind = 0; int out_ind = 0;
while((out_ind < N_SIS_SELECT) && (ff < in_vec_2.size())) while((out_ind < N_SIS_SELECT) && (ff < in_vec_2.size()))
{ {
const node_ptr cur_node = std::get<0>(in_vec_2[ff]); if(in_vec_2[ff]._feat && IS_VALID(in_vec_2[ff]._feat->stand_value_ptr(), in_vec_2[ff]._feat->n_samp(), CROSS_COR_MAX, out_vec, in_vec_2[ff]._score))
if(cur_node && IS_VALID(cur_node->value_ptr(), cur_node->n_samp(), CROSS_COR_MAX, out_vec, std::get<1>(in_vec_2[ff])))
{ {
out_vec.push_back(in_vec_2[ff]); out_vec.push_back(in_vec_2[ff]);
++out_ind; ++out_ind;
} }
++ff; ++ff;
} }
return out_vec; return out_vec;
} }
...@@ -35,26 +35,6 @@ namespace mpi_reduce_op ...@@ -35,26 +35,6 @@ namespace mpi_reduce_op
extern double CROSS_COR_MAX; //!< The maximum cross correlation between features extern double CROSS_COR_MAX; //!< The maximum cross correlation between features
extern int N_SIS_SELECT; //!< The number of features to select extern int N_SIS_SELECT; //!< The number of features to select
/**
* @brief Create a node_sc pair from a node_ptr and a score value
*
* @param feat the node_ptr for the pair
* @param sc the score for the pair
*
* @return The resulting pair
*/
inline node_sc_pair make_node_sc_pair(node_ptr feat, double sc){return std::make_tuple(feat, sc);}
/**
* @brief The function for sorting different node_sc pointers
*
* @param node_1 first node to compare
* @param node_2 second node to compare
*
* @return True if the score of node_1 is less then the score of node_2
*/
inline bool my_sorter(node_sc_pair node_1, node_sc_pair node_2){ return (std::get<1>(node_1) < std::get<1>(node_2)); }
/** /**
* @brief Get the top features of the combined input vectors * @brief Get the top features of the combined input vectors
* *
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include "utils/compare_features.hpp" #include "utils/compare_features.hpp"
#include <iomanip> #include <iomanip>
std::vector<double> comp_feats::DGEMV_OUT;
std::vector<double> comp_feats::RANK; std::vector<double> comp_feats::RANK;
std::vector<int> comp_feats::INDEX; std::vector<int> comp_feats::INDEX;
...@@ -29,13 +30,14 @@ void comp_feats::set_is_valid_fxn( ...@@ -29,13 +30,14 @@ void comp_feats::set_is_valid_fxn(
const double max_corr, const double max_corr,
const int n_samp, const int n_samp,
std::function<bool(const double*, const int, const double, const std::vector<double>&, const double, const int, const int)>& is_valid, std::function<bool(const double*, const int, const double, const std::vector<double>&, const double, const int, const int)>& is_valid,
std::function<bool(const double*, const int, const double, const std::vector<node_ptr>&, const std::vector<double>&, const double)>& is_valid_feat_list std::function<int(const double*, const int, const double, const std::vector<node_ptr>&, const std::vector<double>&, const double)>& is_valid_feat_list
) )
{ {
if(project_type.compare("classification") != 0) if(project_type.compare("classification") != 0)
{ {
if(max_corr < 0.99999) if(max_corr < 0.99999)
{ {
DGEMV_OUT.resize(n_samp);
is_valid = valid_feature_against_selected_pearson; is_valid = valid_feature_against_selected_pearson;
is_valid_feat_list = valid_feature_against_selected_pearson_feat_list; is_valid_feat_list = valid_feature_against_selected_pearson_feat_list;
} }
...@@ -79,9 +81,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1( ...@@ -79,9 +81,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1(
const int start_sel const int start_sel
) )
{ {
double mean = util_funcs::mean<double>(val_ptr, n_samp); double base_val = std::inner_product(val_ptr, val_ptr + n_samp, val_ptr, 0.0);
double stand_dev = util_funcs::stand_dev(val_ptr, n_samp, mean);
double base_val = util_funcs::r(val_ptr, val_ptr, n_samp, mean, stand_dev, mean, stand_dev);
for(int dd = start_sel; dd < end_sel; ++dd) for(int dd = start_sel; dd < end_sel; ++dd)
{ {
...@@ -90,9 +90,18 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1( ...@@ -90,9 +90,18 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1(
continue; continue;
} }
double comp_value = ( double comp_value = 1.0 / static_cast<double>(n_samp) * (
base_val - std::abs(util_funcs::r(val_ptr, node_value_arrs::get_d_matrix_ptr(dd), n_samp, mean, stand_dev)) base_val -
std::abs(
std::inner_product(
val_ptr,
val_ptr + n_samp,
node_value_arrs::get_stand_d_matrix_ptr(dd),
0.0
)
)
); );
if(std::abs(comp_value) < 5.0e-9) if(std::abs(comp_value) < 5.0e-9)
{ {
return false; return false;
...@@ -101,7 +110,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1( ...@@ -101,7 +110,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1(
return true; return true;
} }
bool comp_feats::valid_feature_against_selected_pearson_max_corr_1_feat_list( int comp_feats::valid_feature_against_selected_pearson_max_corr_1_feat_list(
const double* val_ptr, const double* val_ptr,
const int n_samp, const int n_samp,
const double cross_cor_max, const double cross_cor_max,
...@@ -110,9 +119,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1_feat_list( ...@@ -110,9 +119,7 @@ bool comp_feats::valid_feature_against_selected_pearson_max_corr_1_feat_list(
const double cur_score const double cur_score
) )
{ {
double mean = util_funcs::mean<double>(val_ptr, n_samp); double base_val = std::inner_product(val_ptr, val_ptr + n_samp, val_ptr, 0.0);