Commit 9e96d36c authored by Thomas Purcell's avatar Thomas Purcell
Browse files

Update value array accessing for new set up

op_slots are now used throughout
parent 76c06f01
......@@ -54,19 +54,6 @@ namespace node_value_arrs
*/
void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat, int max_rung, bool et_task_sz);
/**
* @brief Initialize the node value arrays
* @details Using the size of the initial feature space constructor the storage arrays
*
* @param n_samples Number of training samples for each feature
* @param n_samples_test Number of test samples for each feature
* @param n_primary_feat Number of primary features
*/
inline void initialize_values_arr(int n_samples, int n_samples_test, int n_primary_feat)
{
initialize_values_arr(n_samples, n_samples_test, n_primary_feat, 0, true);
}
/**
* @brief Initialize the node value arrays
* @details Using the size of the initial feature space constructor the storage arrays
......@@ -90,7 +77,7 @@ namespace node_value_arrs
* @param n_primary_feat Number of primary features
* @param max_rung Largest rung of a feature
*/
void initialize_values_arr(std::vector<int> task_sz_train, std::vector<int> task_sz_test, int n_primary_feat, int max_rung=0);
void initialize_values_arr(std::vector<int> task_sz_train, std::vector<int> task_sz_test, int n_primary_feat, int max_rung);
/**
* @brief Resize the node value arrays
......@@ -131,25 +118,57 @@ namespace node_value_arrs
*/
void set_task_sz_test(std::vector<int> task_sz_test);
/**
* @brief Get the operator slot associated with a given rung/offset
*
* @param rung Rung of the feature
* @param offset Offset used to prevent overwrites
*
* @return The operator slot to use
*/
inline int get_op_slot(int rung, int offset){return std::abs(N_OP_SLOTS - static_cast<int>(std::pow(2, MAX_RUNG - rung)) - offset);}
/**
* @brief Get a reference slot/feature register of the training data
*
* @param ind The Node's arr_ind
* @param op_slot(int) Offset integer for TEMP_STORE_ARRAY
*
* @return The register element for a given feature index and op_slot
*/
inline int& temp_storage_reg(unsigned long int ind, int op_slot = 0){return TEMP_STORAGE_REG[(ind % N_STORE_FEATURES) + (op_slot % N_OP_SLOTS) * N_STORE_FEATURES + omp_get_thread_num() * (N_STORE_FEATURES * N_OP_SLOTS + 1)];}
/**
* @brief Get a reference slot/feature register of the test data
*
* @param ind The Node's arr_ind
* @param op_slot(int) Offset integer for TEMP_STORE_TEST_ARRAY
*
* @return The register element for a given feature index and op_slot
*/
inline int& temp_storage_test_reg(unsigned long int ind, int op_slot = 0){return TEMP_STORAGE_TEST_REG[(ind % N_STORE_FEATURES) + (op_slot % N_OP_SLOTS) * N_STORE_FEATURES + omp_get_thread_num() * (N_STORE_FEATURES * N_OP_SLOTS + 1)];}
/**
* @brief Get a reference slot/feature register of the training data
*
* @param ind The Node's arr_ind
* @param offset(int) Offset integer for TEMP_STORE_ARRAY
* @param rung Rung of the feature
* @param offset Offset used to prevent overwrites
*
* @return The register element for a given feature index and offset
*/
inline int& temp_storage_reg(unsigned long int ind, int offset = 0){return TEMP_STORAGE_REG[(ind % N_STORE_FEATURES) + (offset % N_OP_SLOTS) * N_STORE_FEATURES + omp_get_thread_num() * (N_STORE_FEATURES * N_OP_SLOTS + 1)];}
inline int& temp_storage_reg(unsigned long int ind, int rung, int offset){return TEMP_STORAGE_REG[(ind % N_STORE_FEATURES) + (get_op_slot(rung, offset) % N_OP_SLOTS) * N_STORE_FEATURES + omp_get_thread_num() * (N_STORE_FEATURES * N_OP_SLOTS + 1)];}
/**
* @brief Get a reference slot/feature register of the test data
*
* @param ind The Node's arr_ind
* @param offset(int) Offset integer for TEMP_STORE_TEST_ARRAY
* @param rung Rung of the feature
* @param offset Offset used to prevent overwrites
*
* @return The register element for a given feature index and offset
*/
inline int& temp_storage_test_reg(unsigned long int ind, int offset = 0){return TEMP_STORAGE_TEST_REG[(ind % N_STORE_FEATURES) + (offset % N_OP_SLOTS) * N_STORE_FEATURES + omp_get_thread_num() * (N_STORE_FEATURES * N_OP_SLOTS + 1)];}
inline int& temp_storage_test_reg(unsigned long int ind, int rung, int offset){return TEMP_STORAGE_TEST_REG[(ind % N_STORE_FEATURES) + (get_op_slot(rung, offset) % N_OP_SLOTS) * N_STORE_FEATURES + omp_get_thread_num() * (N_STORE_FEATURES * N_OP_SLOTS + 1)];}
/**
* @brief Access element of the permanent training data storage array
......@@ -196,7 +215,7 @@ namespace node_value_arrs
*
* @return The value pointer
*/
double* get_value_ptr(unsigned long int arr_ind, unsigned long int feat_ind, int offset = 0);
double* get_value_ptr(unsigned long int arr_ind, unsigned long int feat_ind, int rung = 0, int offset = 0);
/**
* @brief Get a Node's test_value_ptr
......@@ -207,7 +226,7 @@ namespace node_value_arrs
*
* @return The value pointer
*/
double* get_test_value_ptr(unsigned long int arr_ind, unsigned long int feat_ind, int offset = 0);
double* get_test_value_ptr(unsigned long int arr_ind, unsigned long int feat_ind, int rung = 0, int offset = 0);
/**
* @brief Get the pointer to a particular selected Node from sis
......@@ -230,7 +249,7 @@ namespace node_value_arrs
* @brief Flush the temporary storage register (training data)
* @details Reset all slots in the register to -1
*/
inline void clear_temp_reg_thread(){std::fill_n(TEMP_STORAGE_REG.data() + N_STORE_FEATURES * 3 * omp_get_thread_num(), N_STORE_FEATURES * 3, -1);}
inline void clear_temp_reg_thread(){std::fill_n(TEMP_STORAGE_REG.data() + N_STORE_FEATURES * N_OP_SLOTS * omp_get_thread_num(), N_STORE_FEATURES * N_OP_SLOTS, -1);}
/**
* @brief Flush the temporary storage register (test data)
......
......@@ -53,7 +53,13 @@ def get_unit(header):
def generate_phi_0_from_csv(
df, prop_key, cols="all", task_key=None, leave_out_frac=0.0, leave_out_inds=None
df,
prop_key,
cols="all",
task_key=None,
leave_out_frac=0.0,
leave_out_inds=None,
max_rung=None,
):
"""Create initial feature set from csv file
......@@ -63,6 +69,7 @@ def generate_phi_0_from_csv(
cols (list or str): The columns to include in the initial feature set
task_key (str): The key corresponding to which column in the csv file the task differentiation is stored in
leave_out_frac (list): List of indices to pull from the training data to act as a test set
max_rung (int): Maximum rung of a feature
Returns:
phi_0 (list of FeatureNodes): The list of primary features
......@@ -73,6 +80,8 @@ def generate_phi_0_from_csv(
leave_out_frac (float): Fraction of samples to leave out
leave_out_inds (list): Indices to use as the test set
"""
if not max_rung:
raise ValueError("Maximum rung for the calculation is not defined.")
# Load csv file
if isinstance(df, str):
......@@ -166,7 +175,7 @@ def generate_phi_0_from_csv(
exprs = list([col.split("(")[0] for col in columns])
units = list([get_unit(col) for col in columns])
initialize_values_arr(len(train_inds), len(leave_out_inds), len(columns))
initialize_values_arr(len(train_inds), len(leave_out_inds), len(columns), max_rung)
test_values = df.to_numpy().T[:, leave_out_inds]
values = df.to_numpy().T[:, train_inds]
......@@ -295,6 +304,7 @@ def generate_fs_sr_from_csv(
task_key=task_key,
leave_out_frac=leave_out_frac,
leave_out_inds=leave_out_inds,
max_rung=max_phi,
)
fs = generate_fs(
......
......@@ -37,7 +37,7 @@ void sisso::register_all()
sisso::feature_creation::node::registerSqrtNode();
sisso::feature_creation::node::registerSixPowNode();
void (*init_val_ar)(int, int, int) = &node_value_arrs::initialize_values_arr;
void (*init_val_ar)(int, int, int, int) = &node_value_arrs::initialize_values_arr;
def("phi_selected_from_file", &str2node::phi_selected_from_file_py);
def("initialize_values_arr", init_val_ar);
......
......@@ -42,10 +42,10 @@ namespace sisso
inline Unit unit(){return this->get_override("unit")();}
inline std::vector<double> value(){return this->get_override("value")();}
inline std::vector<double> test_value(){return this->get_override("test_value")();}
inline void set_value(int offset = -1){this->get_override("set_value")();}
inline double* value_ptr(int offset = -1){return this->get_override("value_ptr")();}
inline void set_test_value(int offset = -1){this->get_override("set_test_value")();}
inline double* test_value_ptr(int offset = -1){return this->get_override("test_value_ptr")();}
inline void set_value(int offset=0){this->get_override("set_value")();}
inline double* value_ptr(int offset=0){return this->get_override("value_ptr")();}
inline void set_test_value(int offset=0){this->get_override("set_test_value")();}
inline double* test_value_ptr(int offset=0){return this->get_override("test_value_ptr")();}
inline bool is_nan(){return this->get_override("is_nan")();}
inline bool is_const(){return this->get_override("is_const")();}
inline NODE_TYPE type(){return this->get_override("type")();}
......@@ -65,8 +65,8 @@ namespace sisso
template<int N>
struct OperatorNodeWrap : OperatorNode<N>, py::wrapper<OperatorNode<N>>
{
inline void set_value(int offset = -1){this->get_override("set_value")();}
inline void set_test_value(int offset = -1){this->get_override("set_test_value")();}
inline void set_value(int offset=0){this->get_override("set_value")();}
inline void set_test_value(int offset=0){this->get_override("set_test_value")();}
inline NODE_TYPE type(){return this->get_override("type")();}
inline int rung(int cur_rung = 0){return this->get_override("rung")();}
inline std::string expr(){return this->get_override("expr")();}
......
......@@ -9,7 +9,7 @@ FeatureNode::FeatureNode(unsigned long int feat_ind, std::string expr, np::ndarr
{
// Automatically resize the storage arrays
if(node_value_arrs::N_STORE_FEATURES == 0)
node_value_arrs::initialize_values_arr(_n_samp, _n_test_samp, 1);
node_value_arrs::initialize_values_arr(_n_samp, _n_test_samp, 1, 0);
else if((_n_samp != node_value_arrs::N_SAMPLES) || (_n_test_samp != node_value_arrs::N_SAMPLES_TEST))
throw std::logic_error("Number of samples in current feature is not the same as the others, (" + std::to_string(_n_samp) + " and " + std::to_string(_n_test_samp) + " vs. " + std::to_string(node_value_arrs::N_SAMPLES) + " and " + std::to_string(node_value_arrs::N_SAMPLES_TEST) + ")");
else if(feat_ind >= node_value_arrs::N_STORE_FEATURES)
......@@ -29,7 +29,7 @@ FeatureNode::FeatureNode(unsigned long int feat_ind, std::string expr, py::list
// Automatically resize the storage arrays
if(node_value_arrs::N_STORE_FEATURES == 0)
node_value_arrs::initialize_values_arr(_n_samp, _n_test_samp, 1);
node_value_arrs::initialize_values_arr(_n_samp, _n_test_samp, 1, 0);
else if((_n_samp != node_value_arrs::N_SAMPLES) || (_n_test_samp != node_value_arrs::N_SAMPLES_TEST))
throw std::logic_error("Number of samples in current feature is not the same as the others, (" + std::to_string(_n_samp) + " and " + std::to_string(_n_test_samp) + " vs. " + std::to_string(node_value_arrs::N_SAMPLES) + " and " + std::to_string(node_value_arrs::N_SAMPLES_TEST) + ")");
else if(feat_ind >= node_value_arrs::N_STORE_FEATURES)
......
......@@ -121,6 +121,7 @@ FeatureSpace::FeatureSpace(
std::vector<int> rung_inds = util_funcs::argsort(rungs);
_max_phi = *std::max_element(rungs.begin(), rungs.end());
node_value_arrs::MAX_RUNG = _max_phi;
_phi[0] = phi_temp[rung_inds[0]];
for(int ff = 1; ff < _n_feat; ++ff)
{
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment