nodes_value_containers.hpp 18.1 KB
Newer Older
1
// Copyright 2021 Thomas A. R. Purcell
2
//
3
4
5
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
6
//
7
//     http://www.apache.org/licenses/LICENSE-2.0
8
//
9
10
11
12
13
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
14

15
/** @file feature_creation/node/value_storage/node_value_containers.hpp
16
 *  @brief Creates a central storage area for the feature's input data and the descriptor matrix
17
 *
18
 *  @author Thomas A. R. Purcell (tpurcell90)
19
20
 *  @bug No known bugs.
 */
21

22
23
#ifndef NODE_VALUE_ARR
#define NODE_VALUE_ARR
Thomas Purcell's avatar
Thomas Purcell committed
24

Thomas Purcell's avatar
Thomas Purcell committed
25
#include <algorithm>
26
#include <cmath>
Thomas Purcell's avatar
Thomas Purcell committed
27
#include <memory>
Thomas Purcell's avatar
Thomas Purcell committed
28
#include <numeric>
Thomas Purcell's avatar
Thomas Purcell committed
29
#include <vector>
30
#include <iostream>
Thomas Purcell's avatar
Thomas Purcell committed
31

32
33
#include <omp.h>

Thomas Purcell's avatar
Thomas Purcell committed
34
#include"utils/enum.hpp"
Thomas Purcell's avatar
Thomas Purcell committed
35

36
#ifdef PY_BINDINGS
37
#include "python/py_binding_cpp_def/conversion_utils.hpp"
38
39
40
41
namespace np = boost::python::numpy;
namespace py = boost::python;
#endif

Thomas Purcell's avatar
Thomas Purcell committed
42
43
namespace node_value_arrs
{
Thomas Purcell's avatar
Thomas Purcell committed
44
45
46
47
48
49
50
51
52
53
54
55
56
    extern std::vector<double> VALUES_ARR; //!< The central storage location for the values of the training data for each feature with a rung <= N_RUNGS_STORED
    extern std::vector<double> TEST_VALUES_ARR; //!< The central storage location for the values of the test data for each feature with a rung <= N_RUNGS_STORED

    extern std::vector<double> TEMP_STORAGE_ARR; //!< The vector used to temporarily store the values of each feature with a rung > N_RUNGS_STORED (These are calculated on the fly when the values are required)
    extern std::vector<double> TEMP_STORAGE_TEST_ARR; //!< The vector used to temporarily store the values of each feature with a rung > N_RUNGS_STORED (These are calculated on the fly when the values are required)
    extern std::vector<int> TEMP_STORAGE_REG; //!< Register that maps the slots in TEMP_STORAGE_ARR to the index of the feature whose data is currently stored there (reset by setting all elements of this vector to -1)
    extern std::vector<int> TEMP_STORAGE_TEST_REG; //!< Register that maps the slots in TEMP_STORAGE_TEST_ARR to the index of the feature whose data is currently stored there (reset by setting all elements of this vector to -1)

    extern std::vector<double> PARAM_STORAGE_ARR; //!< The vector used to temporarily store the values of each feature that has free-parameters (These are calculated on the fly when the values are required)
    extern std::vector<double> PARAM_STORAGE_TEST_ARR; //!< The vector used to temporarily store the values of each feature that has free-parameters (These are calculated on the fly when the values are required)

    extern std::vector<double> D_MATRIX; //!< The descriptor matrix (Central storage for the selected feature space)

Thomas Purcell's avatar
Thomas Purcell committed
57
58
59
    extern std::vector<int> TASK_SZ_TRAIN; //!< Number of training samples per task
    extern std::vector<int> TASK_SZ_TEST; //!< Number of test sample per task

Thomas Purcell's avatar
Thomas Purcell committed
60
61
62
63
64
65
    extern int N_SELECTED; //!< Number of selected features

    extern int N_SAMPLES; //!< Number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN)
    extern int N_SAMPLES_TEST; //!< Number of test samples for each feature (Sum of all elements in TASK_SZ_TEST)

    extern int N_PRIMARY_FEATURES; //!< Number of primary features
Thomas Purcell's avatar
Thomas Purcell committed
66
    extern int N_STORE_FEATURES; //!< Number of features with stored values
Thomas Purcell's avatar
Thomas Purcell committed
67
    extern int N_RUNGS_STORED; //!< Maximum rung for permanently storing a features value
68
    extern int MAX_RUNG; //!< The maximum rung for all features
Thomas Purcell's avatar
Thomas Purcell committed
69

Thomas Purcell's avatar
Thomas Purcell committed
70
71
72
73
    extern int MAX_N_THREADS; //!< Get the maximum number of threads possible for a calculation
    extern int N_OP_SLOTS; //!< The number of possible nodes of the binary expression tree that maybe calculated on the fly
    extern int N_PARAM_OP_SLOTS; //!< The number of possible non-leaf nodes of the binary expression tree

Thomas Purcell's avatar
Thomas Purcell committed
74
    /**
Thomas Purcell's avatar
Thomas Purcell committed
75
     * @brief Initialize all central storage vectors/descriptive variables
Thomas Purcell's avatar
Thomas Purcell committed
76
     *
Thomas Purcell's avatar
Thomas Purcell committed
77
78
79
80
     * @param n_samples The number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN)
     * @param n_samples_test The number of test samples for each feature (Sum of all elements in TASK_SZ_TEST)
     * @param n_primary_feat The number of primary features
     * @param max_rung The maximum rung for all features
Thomas Purcell's avatar
Thomas Purcell committed
81
     * @param set_test_task_sz If True reset the task_sz vectors
Thomas Purcell's avatar
Thomas Purcell committed
82
     * @param use_params If True set up parameterized feature storage as well
Thomas Purcell's avatar
Thomas Purcell committed
83
     */
84
85
86
87
88
89
90
91
    void initialize_values_arr(
        const int n_samples,
        const int n_samples_test,
        const int n_primary_feat,
        const int max_rung,
        const bool set_task_sz,
        const bool use_params
    );
Thomas Purcell's avatar
Thomas Purcell committed
92

93
    // DocString: node_vals_init_no_ts
Thomas Purcell's avatar
Thomas Purcell committed
94
    /**
Thomas Purcell's avatar
Thomas Purcell committed
95
     * @brief Initialize all central storage vectors/descriptive variables
Thomas Purcell's avatar
Thomas Purcell committed
96
     *
Thomas Purcell's avatar
Thomas Purcell committed
97
98
99
100
     * @param n_samples The number of training samples for each feature (Sum of all elements in TASK_SZ_TRAIN)
     * @param n_samples_test The number of test samples for each feature (Sum of all elements in TASK_SZ_TEST)
     * @param n_primary_feat The number of primary features
     * @param max_rung The maximum rung for all features
Thomas Purcell's avatar
Thomas Purcell committed
101
     */
102
103
104
105
106
107
    inline void initialize_values_arr(
        const int n_samples,
        const int n_samples_test,
        const int n_primary_feat,
        const int max_rung
    )
Thomas Purcell's avatar
Thomas Purcell committed
108
    {
109
        initialize_values_arr(n_samples, n_samples_test, n_primary_feat, max_rung, true, false);
Thomas Purcell's avatar
Thomas Purcell committed
110
111
112
    }

    /**
Thomas Purcell's avatar
Thomas Purcell committed
113
     * @brief Initialize all central storage vectors/descriptive variables
Thomas Purcell's avatar
Thomas Purcell committed
114
     *
Thomas Purcell's avatar
Thomas Purcell committed
115
116
117
118
119
     * @param task_sz_train The number of training samples per task
     * @param task_sz_test The number of test sample per task
     * @param n_primary_feat The number of primary features
     * @param max_rung The maximum rung for all features
     * @param use_params If True set up parameterized feature storage as well
Thomas Purcell's avatar
Thomas Purcell committed
120
     */
121
122
123
124
125
126
127
    void initialize_values_arr(
        const std::vector<int> task_sz_train,
        const std::vector<int> task_sz_test,
        const int n_primary_feat,
        const int max_rung,
        const bool use_params
    );
Thomas Purcell's avatar
Thomas Purcell committed
128
129

    /**
Thomas Purcell's avatar
Thomas Purcell committed
130
     * @brief Resize the central storage array given a new number of features and the current rung of the features
Thomas Purcell's avatar
Thomas Purcell committed
131
     *
Thomas Purcell's avatar
Thomas Purcell committed
132
133
     * @param n_rung The current rung of the generated features
     * @param n_feat The new number of features to store
Thomas Purcell's avatar
Thomas Purcell committed
134
     */
Thomas Purcell's avatar
Thomas Purcell committed
135
    void resize_values_arr(const int n_dims, const int n_feat);
Thomas Purcell's avatar
Thomas Purcell committed
136

Thomas Purcell's avatar
Thomas Purcell committed
137
    // DocString: node_vals_init_param
138
139
140
    /**
     * @brief Initialize the parameter storage array
     */
Thomas Purcell's avatar
Thomas Purcell committed
141

142
143
    void initialize_param_storage();

Thomas Purcell's avatar
Thomas Purcell committed
144
    // DocString: node_vals_init_d_mat
145
    /**
146
     * @brief Initialize the descriptor matrix
147
148
149
150
151
     *
     */
    void initialize_d_matrix_arr();

    /**
Thomas Purcell's avatar
Thomas Purcell committed
152
     * @brief Resize the descriptor matrix for the new number of selected features
153
154
155
     *
     * @param n_select Number of features to select
     */
156
    void resize_d_matrix_arr(const int n_select);
157

Thomas Purcell's avatar
Thomas Purcell committed
158
159
160
161
162
    /**
     * @brief Reset the global TASK_SZ_TRAIN vector
     *
     * @param task_sz_train the new task_sz train
     */
163
    void set_task_sz_train(const std::vector<int> task_sz_train);
Thomas Purcell's avatar
Thomas Purcell committed
164
165
166
167
168
169

    /**
     * @brief Reset the global TASK_SZ_TEST vector
     *
     * @param task_sz_train the new test_sz train
     */
170
    void set_task_sz_test(const std::vector<int> task_sz_test);
Thomas Purcell's avatar
Thomas Purcell committed
171

172
173
174
175
    /**
     * @brief Get the operator slot associated with a given rung/offset
     *
     * @param rung Rung of the feature
Thomas Purcell's avatar
Thomas Purcell committed
176
     * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is)
177
     * @param for_comp If true get a slot dedicated to comparing features
178
179
180
     *
     * @return The operator slot to use
     */
181
    inline int get_op_slot(const int rung, const int offset, const bool for_comp)
182
183
184
    {
        return std::abs(N_OP_SLOTS / (1 + !for_comp) - static_cast<int>(std::pow(2, MAX_RUNG - rung)) - offset);
    }
185

186
187
188
189
    /**
     * @brief Get the parameter operator slot associated with a given rung/offset
     *
     * @param rung Rung of the feature
Thomas Purcell's avatar
Thomas Purcell committed
190
     * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is)
191
192
193
194
195
196
197
198
199
     * @param for_comp If true get a slot dedicated to comparing features
     *
     * @return The operator slot to use
     */
    inline int get_param_op_slot(const int rung, const int offset, const bool for_comp)
    {
        return std::abs(N_PARAM_OP_SLOTS / (1 + !for_comp) - static_cast<int>(std::pow(2, MAX_RUNG - rung)) - offset);
    }

Thomas Purcell's avatar
Thomas Purcell committed
200
    /**
201
     * @brief Get a reference slot/feature register of the training data
Thomas Purcell's avatar
Thomas Purcell committed
202
     *
203
     * @param ind The Node's arr_ind
204
205
206
207
     * @param op_slot(int) Offset integer for TEMP_STORE_ARRAY
     *
     * @return The register element for a given feature index and op_slot
     */
208
    inline int& temp_storage_reg(const unsigned long int ind, const int op_slot=0)
209
210
    {
        return TEMP_STORAGE_REG[
Thomas Purcell's avatar
Thomas Purcell committed
211
            (ind % N_PRIMARY_FEATURES) + (op_slot % N_OP_SLOTS) * N_PRIMARY_FEATURES + omp_get_thread_num() * (N_PRIMARY_FEATURES * N_OP_SLOTS + 1)
212
213
        ];
    }
214
215
216
217
218
219
220
221
222

    /**
     * @brief Get a reference slot/feature register of the test data
     *
     * @param ind The Node's arr_ind
     * @param op_slot(int) Offset integer for TEMP_STORE_TEST_ARRAY
     *
     * @return The register element for a given feature index and op_slot
     */
223
    inline int& temp_storage_test_reg(const unsigned long int ind, const int op_slot=0)
224
225
    {
        return TEMP_STORAGE_TEST_REG[
Thomas Purcell's avatar
Thomas Purcell committed
226
            (ind % N_PRIMARY_FEATURES) + (op_slot % N_OP_SLOTS) * N_PRIMARY_FEATURES + omp_get_thread_num() * (N_PRIMARY_FEATURES * N_OP_SLOTS + 1)
227
228
        ];
    }
229

Thomas Purcell's avatar
Thomas Purcell committed
230
    /**
231
     * @brief Get a reference slot/feature register of the training data
Thomas Purcell's avatar
Thomas Purcell committed
232
     *
233
     * @param ind The Node's arr_ind
234
     * @param rung Rung of the feature
Thomas Purcell's avatar
Thomas Purcell committed
235
     * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is)
236
     * @param for_comp If true get a slot dedicated to comparing features
Thomas Purcell's avatar
Thomas Purcell committed
237
238
239
     *
     * @return The register element for a given feature index and offset
     */
240
    inline int& temp_storage_reg(const unsigned long int ind, const int rung, const int offset, const bool for_comp)
241
242
    {
        return TEMP_STORAGE_REG[
Thomas Purcell's avatar
Thomas Purcell committed
243
244
245
            (ind % N_PRIMARY_FEATURES) +
            (get_op_slot(rung, offset, for_comp) % N_OP_SLOTS) * N_PRIMARY_FEATURES +
            omp_get_thread_num() * (N_PRIMARY_FEATURES * N_OP_SLOTS + 1)
246
247
        ];
    }
Thomas Purcell's avatar
Thomas Purcell committed
248

249
    /**
250
     * @brief Get a reference slot/feature register of the test data
251
     *
252
     * @param ind The Node's arr_ind
253
     * @param rung Rung of the feature
Thomas Purcell's avatar
Thomas Purcell committed
254
     * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is)
255
     * @param for_comp If true get a slot dedicated to comparing features
256
257
258
     *
     * @return The register element for a given feature index and offset
     */
259
    inline int& temp_storage_test_reg(const unsigned long int ind, const int rung, const int offset, const bool for_comp)
260
261
    {
        return TEMP_STORAGE_TEST_REG[
Thomas Purcell's avatar
Thomas Purcell committed
262
            (ind % N_PRIMARY_FEATURES) +
Thomas Purcell's avatar
Thomas Purcell committed
263
264
            (get_op_slot(rung, offset, for_comp) % N_OP_SLOTS) * N_PRIMARY_FEATURES +
            omp_get_thread_num() * (N_PRIMARY_FEATURES * N_OP_SLOTS + 1)
265
266
        ];
    }
267

Thomas Purcell's avatar
Thomas Purcell committed
268
    /**
269
     * @brief Access element of the permanent training data storage array
Thomas Purcell's avatar
Thomas Purcell committed
270
     *
Thomas Purcell's avatar
Thomas Purcell committed
271
     * @param feature_ind The _feat_ind of Node to get the training data of
Thomas Purcell's avatar
Thomas Purcell committed
272
     *
273
     * @return pointer to the Node's training data
Thomas Purcell's avatar
Thomas Purcell committed
274
     */
275
    inline double* access_value_arr(const unsigned long int feature_ind){return &VALUES_ARR[feature_ind*N_SAMPLES];}
276

277
    /**
278
     * @brief Access element of the permanent test data storage array
279
     *
Thomas Purcell's avatar
Thomas Purcell committed
280
     * @param feature_ind The _feat_ind of Node to get the test data of
281
     *
282
     * @return pointer to the Node's test data
283
     */
284
    inline double* access_test_value_arr(const unsigned long int feature_ind){return &TEST_VALUES_ARR[feature_ind*N_SAMPLES_TEST];}
285

Thomas Purcell's avatar
Thomas Purcell committed
286
    /**
287
     * @brief Access element of temporary storage array for the training data
Thomas Purcell's avatar
Thomas Purcell committed
288
     *
289
     * @param slot The slot of the temporary storage array
Thomas Purcell's avatar
Thomas Purcell committed
290
     *
291
     * @return pointer to the data stored in the specified slot
Thomas Purcell's avatar
Thomas Purcell committed
292
     */
293
    inline double* access_temp_storage(const unsigned long int slot){return &TEMP_STORAGE_ARR[slot*N_SAMPLES];}
294

295
    /**
296
     * @brief Access element of temporary storage array for the test data
297
     *
298
     * @param slot The slot of the temporary storage array
299
     *
300
     * @return pointer to the data stored in the specified slot
301
     */
302
    inline double* access_temp_storage_test(const unsigned long int slot){return &TEMP_STORAGE_TEST_ARR[slot*N_SAMPLES_TEST];}
303

304
305
306
307
    /**
     * @brief Access the param storage array
     *
     * @param rung Rung of the feature
Thomas Purcell's avatar
Thomas Purcell committed
308
     * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is)
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
     * @param for_comp If true get a slot dedicated to comparing features
     * @return pointer to the correct element of the PARAM_STORAGE_ARR
     */
    inline double* access_param_storage(const int rung=0, const int offset=0, const bool for_comp=false)
    {
        return &PARAM_STORAGE_ARR[
            N_SAMPLES *
            (
                (get_param_op_slot(rung, offset, for_comp) % N_PARAM_OP_SLOTS) +
                omp_get_thread_num() * (N_PARAM_OP_SLOTS + 1)
            )
        ];
    }

    /**
     * @brief Access the param storage array for the test set
     *
     * @param rung Rung of the feature
Thomas Purcell's avatar
Thomas Purcell committed
327
     * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is)
328
329
330
331
332
333
334
335
336
337
338
339
340
341
     * @param for_comp If true get a slot dedicated to comparing features
     * @return pointer to the correct element of the PARAM_STORAGE_ARR
     */
    inline double* access_param_storage_test(const int rung=0, const int offset=0, const bool for_comp=false)
    {
        return &PARAM_STORAGE_TEST_ARR[
            N_SAMPLES_TEST *
            (
                (get_param_op_slot(rung, offset, for_comp) % N_PARAM_OP_SLOTS) +
                omp_get_thread_num() * (N_PARAM_OP_SLOTS + 1)
            )
        ];
    }

Thomas Purcell's avatar
Thomas Purcell committed
342
    /**
343
     * @brief Get a Node's value_ptr
Thomas Purcell's avatar
Thomas Purcell committed
344
     *
345
346
     * @param arr_ind Nodes _arr_ind
     * @param feat_ind Nodes _feat_ind
347
     * @param rung Rung of the feature
Thomas Purcell's avatar
Thomas Purcell committed
348
     * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is)
349
     * @param for_comp If true get a slot dedicated to comparing features
Thomas Purcell's avatar
Thomas Purcell committed
350
351
352
     *
     * @return The value pointer
     */
353
354
355
356
357
    double* get_value_ptr(
        const unsigned long int arr_ind,
        const unsigned long int feat_ind,
        const int rung=0,
        const int offset=0,
358
        const bool for_comp=false
359
    );
Thomas Purcell's avatar
Thomas Purcell committed
360

361
    /**
362
     * @brief Get a Node's test_value_ptr
363
     *
364
365
     * @param arr_ind Nodes _arr_ind
     * @param feat_ind Nodes _feat_ind
366
     * @param rung Rung of the feature
Thomas Purcell's avatar
Thomas Purcell committed
367
     * @param offset Offset used to prevent overwrites (determines where in the binary expression tree this operator is)
368
     * @param for_comp If true get a slot dedicated to comparing features
369
370
371
     *
     * @return The value pointer
     */
372
373
374
375
376
    double* get_test_value_ptr(
        const unsigned long int arr_ind,
        const unsigned long int feat_ind,
        const int rung=0,
        const int offset=0,
377
        const bool for_comp=false
378
    );
379

380
    /**
381
     * @brief Get the pointer to a particular selected Node from sis
382
     *
383
     * @param ind Index of the data in the descriptor matrix
384
     * @return The pointer to the descriptor matrix's data
385
     */
386
    inline double* get_d_matrix_ptr(const int ind){return &D_MATRIX[ind * N_SAMPLES];}
Thomas Purcell's avatar
Bug Fix    
Thomas Purcell committed
387

388
389
390
391
    /**
     * @brief Flush the temporary storage register (training data)
     * @details Reset all slots in the register to -1
     */
392
    inline void clear_temp_reg(){std::fill_n(TEMP_STORAGE_REG.begin(), TEMP_STORAGE_REG.size(), -1);}
393

394
395
396
397
    /**
     * @brief Flush the temporary storage register (training data)
     * @details Reset all slots in the register to -1
     */
398
399
    inline void clear_temp_reg_thread()
    {
Thomas Purcell's avatar
Thomas Purcell committed
400
        std::fill_n(TEMP_STORAGE_REG.begin() + (N_PRIMARY_FEATURES * N_OP_SLOTS + 1) * omp_get_thread_num(), N_PRIMARY_FEATURES * N_OP_SLOTS + 1, -1);
401
    }
402

403
404
405
406
    /**
     * @brief Flush the temporary storage register (test data)
     * @details Reset all slots in the register to -1
     */
407
    inline void clear_temp_test_reg(){std::fill_n(TEMP_STORAGE_TEST_REG.begin(), TEMP_STORAGE_TEST_REG.size(), -1);}
408

409
410
411
412
413
414
415
#ifdef PY_BINDINGS

    // DocString: node_vals_ts_list
    /**
     * @brief Initialize the node value arrays
     * @details Using the size of the initial feature space constructor the storage arrays
     *
Thomas Purcell's avatar
Thomas Purcell committed
416
417
418
419
     * @param task_sz_train (list) The number of training samples per task
     * @param task_sz_test (list) The number of test sample per task
     * @param n_primary_feat (int) The number of primary features
     * @param max_rung (int) The maximum rung for all features
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
     */
    inline void initialize_values_arr(
        py::list task_sz_train,
        py::list task_sz_test,
        int n_primary_feat,
        int max_rung
    )
    {
        initialize_values_arr(
            python_conv_utils::from_list<int>(task_sz_train),
            python_conv_utils::from_list<int>(task_sz_test),
            n_primary_feat,
            max_rung,
            false
        );
    }

    // DocString: node_vals_ts_arr
    /**
     * @brief Initialize the node value arrays
     * @details Using the size of the initial feature space constructor the storage arrays
     *
Thomas Purcell's avatar
Thomas Purcell committed
442
443
444
445
     * @param task_sz_train (np.ndarray) The number of training samples per task
     * @param task_sz_test (np.ndarray) The number of test sample per task
     * @param n_primary_feat (int) The number of primary features
     * @param max_rung (int) The maximum rung for all features
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
     */
    inline void initialize_values_arr(
        np::ndarray task_sz_train,
        np::ndarray task_sz_test,
        int n_primary_feat,
        int max_rung
    )
    {
        initialize_values_arr(
            python_conv_utils::from_ndarray<int>(task_sz_train),
            python_conv_utils::from_ndarray<int>(task_sz_test),
            n_primary_feat,
            max_rung,
            false
        );
    }
#endif
Thomas Purcell's avatar
Thomas Purcell committed
463
464
}

465
#endif