// Copyright 2021 Thomas A. R. Purcell // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. /** @file feature_creation/node/value_storage/node_value_containers.hpp * @brief Creates a central storage area for the feature's input data and the descriptor matrix * * @author Thomas A. R. Purcell (tpurcell90) * @bug No known bugs. */ #ifndef NODE_VALUE_ARR #define NODE_VALUE_ARR #include #include #include #include #include #include #include #include"utils/enum.hpp" #ifdef PY_BINDINGS #include "python/py_binding_cpp_def/conversion_utils.hpp" namespace np = boost::python::numpy; namespace py = boost::python; #endif namespace node_value_arrs { extern std::vector VALUES_ARR; //!< The central storage location for the values of the training data for each feature with a rung <= N_RUNGS_STORED extern std::vector TEST_VALUES_ARR; //!< The central storage location for the values of the test data for each feature with a rung <= N_RUNGS_STORED extern std::vector TEMP_STORAGE_ARR; //!< The vector used to temporarily store the values of each feature with a rung > N_RUNGS_STORED (These are calculated on the fly when the values are required) extern std::vector TEMP_STORAGE_TEST_ARR; //!< The vector used to temporarily store the values of each feature with a rung > N_RUNGS_STORED (These are calculated on the fly when the values are required) extern std::vector TEMP_STORAGE_REG; //!< Register that maps the slots in TEMP_STORAGE_ARR to the index of the feature whose data is currently stored there (reset by setting all elements of this vector to -1) extern std::vector TEMP_STORAGE_TEST_REG; //!< Register that maps the slots in TEMP_STORAGE_TEST_ARR to the index of the feature whose data is currently stored there (reset by setting all elements of this vector to -1) extern std::vector PARAM_STORAGE_ARR; //!< The vector used to temporarily store the values of each feature that has free-parameters (These are calculated on the fly when the values are required) extern std::vector PARAM_STORAGE_TEST_ARR; //!< The vector used to temporarily store the values of each feature that has free-parameters (These are calculated on the fly when the values are required) extern std::vector D_MATRIX; //!< The descriptor matrix (Central storage for the selected feature space) extern std::vector TASK_SZ_TRAIN; //!< Number of training samples per task extern std::vector TASK_START_TRAIN; //!< The starting point for each task in the training data extern std::vector TASK_SZ_TEST; //!< Number of test sample per task extern std::vector STANDARDIZED_D_MATRIX; //!< The descriptor matrix filled with standardized feature values (Central storage for the selected feature space) extern std::vector STANDARDIZED_STORAGE_ARR; //!< //!< The vector used to temporarily store the values of the standardized feature training values extern std::vector STANDARDIZED_TEST_STORAGE_ARR; //!< //!< The vector used to temporarily store the values of the standardized feature test values extern int N_SELECTED; //!< Number of selected features extern int N_SAMPLES; //!< Number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN) extern int N_SAMPLES_TEST; //!< Number of test samples for each feature (Sum of all elements in TASK_SZ_TEST) extern int N_PRIMARY_FEATURES; //!< Number of primary features extern int N_STORE_FEATURES; //!< Number of features with stored values extern int N_RUNGS_STORED; //!< Maximum rung for permanently storing a features value extern int MAX_RUNG; //!< The maximum rung for all features extern int MAX_N_THREADS; //!< Get the maximum number of threads possible for a calculation extern int N_OP_SLOTS; //!< The number of possible nodes of the binary expression tree that maybe calculated on the fly extern int N_PARAM_OP_SLOTS; //!< The number of possible non-leaf nodes of the binary expression tree // DocString: node_vals_finalize /** * @brief Resize all storage arrays to be empty */ void finalize_values_arr(); /** * @brief Initialize all central storage vectors/descriptive variables without changing MAX_RUNG * * @param n_samples The number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN) * @param n_samples_test The number of test samples for each feature (Sum of all elements in TASK_SZ_TEST) * @param n_primary_feat The number of primary features */ void initialize_values_arr( const int n_samples, const int n_samples_test, const int n_primary_feat ); /** * @brief Initialize all central storage vectors/descriptive variables * * @param task_sz_train The number of training samples per task * @param task_sz_test The number of test sample per task * @param n_primary_feat The number of primary features * @param max_rung The maximum rung for all features * @param use_params If True set up parameterized feature storage as well */ void initialize_values_arr( const std::vector task_sz_train, const std::vector task_sz_test, const int n_primary_feat, const int max_rung, const bool use_params ); /** * @brief Resize the central storage array given a new number of features and the current rung of the features * * @param n_rung The current rung of the generated features * @param n_feat The new number of features to store */ void resize_values_arr(const int n_dims, const int n_feat); // DocString: node_vals_init_param /** * @brief Initialize the parameter storage array */ void initialize_param_storage(); // DocString: node_vals_init_d_mat /** * @brief Initialize the descriptor matrix * */ void initialize_d_matrix_arr(); /** * @brief Resize the descriptor matrix for the new number of selected features * * @param n_select Number of features to select */ void resize_d_matrix_arr(const int n_select); /** * @brief Reset the global TASK_SZ_TRAIN vector * * @param task_sz_train the new task_sz train */ void set_task_sz_train(const std::vector task_sz_train); /** * @brief Reset the global TASK_SZ_TEST vector * * @param task_sz_train the new test_sz train */ void set_task_sz_test(const std::vector task_sz_test); /** * @brief Set max_rung and initialize the temporary storage arrays * * @param max_rung The maximum rung for the calculation */ void set_max_rung(const int max_rung, bool use_params=false); /** * @brief Get the operator slot associated with a given rung/offset * * @param rung Rung of the feature * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is) * @param for_comp If true get a slot dedicated to comparing features * * @return The operator slot to use */ inline int get_op_slot(const int rung, const int offset, const bool for_comp) { return std::abs(N_OP_SLOTS / (1 + !for_comp) - static_cast(std::pow(2, MAX_RUNG - rung)) - offset); } /** * @brief Get the parameter operator slot associated with a given rung/offset * * @param rung Rung of the feature * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is) * @param for_comp If true get a slot dedicated to comparing features * * @return The operator slot to use */ inline int get_param_op_slot(const int rung, const int offset, const bool for_comp) { return std::abs(N_PARAM_OP_SLOTS / (1 + !for_comp) - static_cast(std::pow(2, MAX_RUNG - rung)) - offset); } /** * @brief Get a reference slot/feature register of the training data * * @param ind The Node's arr_ind * @param op_slot(int) Offset integer for TEMP_STORE_ARRAY * * @return The register element for a given feature index and op_slot */ inline int& temp_storage_reg(const unsigned long int ind, const int op_slot=0) { return TEMP_STORAGE_REG[ (ind % N_PRIMARY_FEATURES) + (op_slot % N_OP_SLOTS) * N_PRIMARY_FEATURES + omp_get_thread_num() * (N_PRIMARY_FEATURES * N_OP_SLOTS + 1) ]; } /** * @brief Get a reference slot/feature register of the test data * * @param ind The Node's arr_ind * @param op_slot(int) Offset integer for TEMP_STORE_TEST_ARRAY * * @return The register element for a given feature index and op_slot */ inline int& temp_storage_test_reg(const unsigned long int ind, const int op_slot=0) { return TEMP_STORAGE_TEST_REG[ (ind % N_PRIMARY_FEATURES) + (op_slot % N_OP_SLOTS) * N_PRIMARY_FEATURES + omp_get_thread_num() * (N_PRIMARY_FEATURES * N_OP_SLOTS + 1) ]; } /** * @brief Get a reference slot/feature register of the training data * * @param ind The Node's arr_ind * @param rung Rung of the feature * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is) * @param for_comp If true get a slot dedicated to comparing features * * @return The register element for a given feature index and offset */ inline int& temp_storage_reg(const unsigned long int ind, const int rung, const int offset, const bool for_comp) { return TEMP_STORAGE_REG[ (ind % N_PRIMARY_FEATURES) + (get_op_slot(rung, offset, for_comp) % N_OP_SLOTS) * N_PRIMARY_FEATURES + omp_get_thread_num() * (N_PRIMARY_FEATURES * N_OP_SLOTS + 1) ]; } /** * @brief Get a reference slot/feature register of the test data * * @param ind The Node's arr_ind * @param rung Rung of the feature * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is) * @param for_comp If true get a slot dedicated to comparing features * * @return The register element for a given feature index and offset */ inline int& temp_storage_test_reg(const unsigned long int ind, const int rung, const int offset, const bool for_comp) { return TEMP_STORAGE_TEST_REG[ (ind % N_PRIMARY_FEATURES) + (get_op_slot(rung, offset, for_comp) % N_OP_SLOTS) * N_PRIMARY_FEATURES + omp_get_thread_num() * (N_PRIMARY_FEATURES * N_OP_SLOTS + 1) ]; } /** * @brief Access element of the permanent training data storage array * * @param feature_ind The _feat_ind of Node to get the training data of * * @return pointer to the Node's training data */ inline double* access_value_arr(const unsigned long int feature_ind){return &VALUES_ARR[feature_ind*N_SAMPLES];} /** * @brief Access element of the permanent test data storage array * * @param feature_ind The _feat_ind of Node to get the test data of * * @return pointer to the Node's test data */ inline double* access_test_value_arr(const unsigned long int feature_ind){return &TEST_VALUES_ARR[feature_ind*N_SAMPLES_TEST];} /** * @brief Access element of temporary storage array for the training data * * @param slot The slot of the temporary storage array * * @return pointer to the data stored in the specified slot */ inline double* access_temp_storage(const unsigned long int slot){return &TEMP_STORAGE_ARR[slot*N_SAMPLES];} /** * @brief Access element of temporary storage array for the test data * * @param slot The slot of the temporary storage array * * @return pointer to the data stored in the specified slot */ inline double* access_temp_storage_test(const unsigned long int slot){return &TEMP_STORAGE_TEST_ARR[slot*N_SAMPLES_TEST];} /** * @brief Access element of temporary standardized storage array for the training data * * @param arr_ind The array index of the feature * @param for_comp True if used for a comparison * * @return pointer to the data stored in the specified slot */ inline double* access_temp_stand_storage(const unsigned long int arr_ind, const bool for_comp) { return &STANDARDIZED_STORAGE_ARR[ ((arr_ind % N_PRIMARY_FEATURES) + for_comp * N_PRIMARY_FEATURES) * N_SAMPLES + omp_get_thread_num() * 2 * (N_PRIMARY_FEATURES + 1) * N_SAMPLES ]; } /** * @brief Access element of temporary standardized storage array for the test data * * @param arr_ind The array index of the feature * @param for_comp True if used for a comparison * * @return pointer to the data stored in the specified slot */ inline double* access_temp_stand_storage_test(const unsigned long int arr_ind, const bool for_comp) { return &STANDARDIZED_TEST_STORAGE_ARR[ ((arr_ind % N_PRIMARY_FEATURES) + for_comp * N_PRIMARY_FEATURES) * N_SAMPLES_TEST + omp_get_thread_num() * 2 * (N_PRIMARY_FEATURES + 1) * N_SAMPLES_TEST ]; } /** * @brief Access the param storage array * * @param rung Rung of the feature * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is) * @param for_comp If true get a slot dedicated to comparing features * @return pointer to the correct element of the PARAM_STORAGE_ARR */ inline double* access_param_storage(const int rung=0, const int offset=0, const bool for_comp=false) { return &PARAM_STORAGE_ARR[ N_SAMPLES * ( (get_param_op_slot(rung, offset, for_comp) % N_PARAM_OP_SLOTS) + omp_get_thread_num() * (N_PARAM_OP_SLOTS + 1) ) ]; } /** * @brief Access the param storage array for the test set * * @param rung Rung of the feature * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is) * @param for_comp If true get a slot dedicated to comparing features * @return pointer to the correct element of the PARAM_STORAGE_ARR */ inline double* access_param_storage_test(const int rung=0, const int offset=0, const bool for_comp=false) { return &PARAM_STORAGE_TEST_ARR[ N_SAMPLES_TEST * ( (get_param_op_slot(rung, offset, for_comp) % N_PARAM_OP_SLOTS) + omp_get_thread_num() * (N_PARAM_OP_SLOTS + 1) ) ]; } /** * @brief Get a Node's value_ptr * * @param arr_ind Nodes _arr_ind * @param feat_ind Nodes _feat_ind * @param rung Rung of the feature * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is) * @param for_comp If true get a slot dedicated to comparing features * * @return The value pointer */ double* get_value_ptr( const unsigned long int arr_ind, const unsigned long int feat_ind, const int rung=0, const int offset=0, const bool for_comp=false ); /** * @brief Get a Node's test_value_ptr * * @param arr_ind Nodes _arr_ind * @param feat_ind Nodes _feat_ind * @param rung Rung of the feature * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is) * @param for_comp If true get a slot dedicated to comparing features * * @return The value pointer */ double* get_test_value_ptr( const unsigned long int arr_ind, const unsigned long int feat_ind, const int rung=0, const int offset=0, const bool for_comp=false ); /** * @brief Get the pointer to a particular selected Node's data from sis * * @param ind Index of the data in the descriptor matrix * @return The pointer to the descriptor matrix's data */ inline double* get_d_matrix_ptr(const int ind){return &D_MATRIX[ind * N_SAMPLES];} /** * @brief Get the pointer to a particular selected Node's data from sis * * @param ind Index of the data in the descriptor matrix * @param taskind The index for the given task * @return The pointer to the descriptor matrix's data */ inline double* get_d_matrix_ptr(const int ind, const int taskind){return &D_MATRIX[ind * N_SAMPLES + TASK_START_TRAIN[taskind]];} /** * @brief Get the pointer to a particular selected Node's standardized from sis * * @param ind Index of the data in the descriptor matrix * @return The pointer to the descriptor matrix's standardized data */ inline double* get_stand_d_matrix_ptr(const int ind){return &STANDARDIZED_D_MATRIX[ind * N_SAMPLES];} /** * @brief Get the pointer to a particular selected Node's standardized from sis * * @param ind Index of the data in the descriptor matrix * @param taskind The index for the given task * @return The pointer to the descriptor matrix's standardized data */ inline double* get_stand_d_matrix_ptr(const int ind, const int taskind){return &STANDARDIZED_D_MATRIX[ind * N_SAMPLES + TASK_START_TRAIN[taskind]];} /** * @brief Flush the temporary storage register (training data) * @details Reset all slots in the register to -1 */ inline void clear_temp_reg(){std::fill_n(TEMP_STORAGE_REG.begin(), TEMP_STORAGE_REG.size(), -1);} /** * @brief Flush the temporary storage register (training data) * @details Reset all slots in the register to -1 */ inline void clear_temp_reg_thread() { std::fill_n(TEMP_STORAGE_REG.begin() + (N_PRIMARY_FEATURES * N_OP_SLOTS + 1) * omp_get_thread_num(), N_PRIMARY_FEATURES * N_OP_SLOTS + 1, -1); } /** * @brief Flush the temporary storage register (test data) * @details Reset all slots in the register to -1 */ inline void clear_temp_test_reg(){std::fill_n(TEMP_STORAGE_TEST_REG.begin(), TEMP_STORAGE_TEST_REG.size(), -1);} #ifdef PY_BINDINGS // DocString: node_vals_ts_list_no_params /** * @brief Initialize the node value arrays * @details Using the size of the initial feature space constructor the storage arrays * * @param task_sz_train (list) The number of training samples per task * @param task_sz_test (list) The number of test sample per task * @param n_primary_feat (int) The number of primary features * @param max_rung (int) The maximum rung for all features */ inline void initialize_values_arr( py::list task_sz_train, py::list task_sz_test, int n_primary_feat, int max_rung ) { initialize_values_arr( python_conv_utils::from_list(task_sz_train), python_conv_utils::from_list(task_sz_test), n_primary_feat, max_rung, false ); } // DocString: node_vals_ts_arr_no_params /** * @brief Initialize the node value arrays * @details Using the size of the initial feature space constructor the storage arrays * * @param task_sz_train (np.ndarray) The number of training samples per task * @param task_sz_test (np.ndarray) The number of test sample per task * @param n_primary_feat (int) The number of primary features * @param max_rung (int) The maximum rung for all features */ inline void initialize_values_arr( np::ndarray task_sz_train, np::ndarray task_sz_test, int n_primary_feat, int max_rung ) { initialize_values_arr( python_conv_utils::from_ndarray(task_sz_train), python_conv_utils::from_ndarray(task_sz_test), n_primary_feat, max_rung, false ); } // DocString: node_vals_ts_list /** * @brief Initialize the node value arrays * @details Using the size of the initial feature space constructor the storage arrays * * @param task_sz_train (list): The number of training samples per task * @param task_sz_test (list): The number of test sample per task * @param n_primary_feat (int): The number of primary features * @param max_rung (int): The maximum rung for all features * @param use_params (bool): If true also initialize parameterized storage */ inline void initialize_values_arr( py::list task_sz_train, py::list task_sz_test, int n_primary_feat, int max_rung, bool use_params ) { initialize_values_arr( python_conv_utils::from_list(task_sz_train), python_conv_utils::from_list(task_sz_test), n_primary_feat, max_rung, use_params ); } // DocString: node_vals_ts_arr /** * @brief Initialize the node value arrays * @details Using the size of the initial feature space constructor the storage arrays * * @param task_sz_train (np.ndarray): The number of training samples per task * @param task_sz_test (np.ndarray): The number of test sample per task * @param n_primary_feat (int): The number of primary features * @param max_rung (int): The maximum rung for all features * @param use_params (bool): If true also initialize parameterized storage */ inline void initialize_values_arr( np::ndarray task_sz_train, np::ndarray task_sz_test, int n_primary_feat, int max_rung, bool use_params ) { initialize_values_arr( python_conv_utils::from_ndarray(task_sz_train), python_conv_utils::from_ndarray(task_sz_test), n_primary_feat, max_rung, use_params ); } #endif } #endif