diff --git a/src/descriptor_identifier/solver/SISSOSolver.cpp b/src/descriptor_identifier/solver/SISSOSolver.cpp index bbec3969af87005df3bf61a7f1963fd6abf41c02..c44d7d9ae5757cb73a6a2985f673bd2fd587dd90 100644 --- a/src/descriptor_identifier/solver/SISSOSolver.cpp +++ b/src/descriptor_identifier/solver/SISSOSolver.cpp @@ -196,7 +196,7 @@ void SISSOSolver::l0_regularization_gpu(const int n_dim) std::vector<std::vector<int>> feature_indices; while (!feature_combinations.is_finished()) { - const size_t MAX_BATCH_SIZE = 65536; + const size_t MAX_BATCH_SIZE = 262144; feature_indices.clear(); for (auto counter = 0; counter < MAX_BATCH_SIZE; ++counter) { diff --git a/src/loss_function/LossFunctionPearsonRMSEGPU.cpp b/src/loss_function/LossFunctionPearsonRMSEGPU.cpp index 8f5d2b364d04700581b3ed81ed67e3b893d489ee..0b5759aaf358588a92d904b8f60e0e996a400dce 100644 --- a/src/loss_function/LossFunctionPearsonRMSEGPU.cpp +++ b/src/loss_function/LossFunctionPearsonRMSEGPU.cpp @@ -362,14 +362,14 @@ Kokkos::View<double*> LossFunctionPearsonRMSEGPU::operator()( int start = 0; for (int task_idx = 0; task_idx < _n_task; ++task_idx) { - set_a(_models, task_idx, start); - set_b(task_idx, start); + set_a(_models, task_idx, start, batch_size); + set_b(task_idx, start, batch_size); Kokkos::fence(); - least_squares(task_idx, start); + least_squares(task_idx, start, batch_size); - set_a(_models, task_idx, start); + set_a(_models, task_idx, start, batch_size); Kokkos::fence(); - set_prop_train_est(_estimated_training_properties, task_idx, start); + set_prop_train_est(_estimated_training_properties, task_idx, start, batch_size); start += _task_sizes_train[task_idx]; } @@ -434,7 +434,8 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<int>& inds, int taskind void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models, int taskind, - int start) + int start, + int batch_size) { assert(_descriptor_matrix.extent(0) >= _task_sizes_train[taskind] + start); Kokkos::deep_copy(_a, 1.0); @@ -443,7 +444,7 @@ void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> m auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<3>>( {0, 0, 0}, - {static_cast<size_t>(_task_sizes_train[taskind]), models.extent(0), models.extent(1)}); + {_task_sizes_train[taskind], static_cast<int>(models.extent(0)), batch_size}); auto kernel = KOKKOS_LAMBDA(const int sample_idx, const int feature_idx, const int model_idx) { a(sample_idx, feature_idx, model_idx) = descriptor_matrix(sample_idx + start, @@ -462,12 +463,13 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<model_node_ptr>& feats, } } -void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start) +void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start, + int batch_size) { auto b = _b; auto training_properties = _training_properties; auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, - {_task_sizes_train[taskind], MAX_BATCHES}); + {_task_sizes_train[taskind], batch_size}); auto kernel = KOKKOS_LAMBDA(const int material_idx, const int batch_idx) { b(material_idx, batch_idx) = training_properties(start + material_idx); @@ -475,7 +477,7 @@ void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start) Kokkos::parallel_for("LossFunctionPearsonRMSE::set_b", policy, kernel); } -int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start) +int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start, int batch_size) { int info; @@ -490,7 +492,7 @@ int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start) _task_sizes_train[taskind], &info, nullptr, - MAX_BATCHES); + batch_size); cudaDeviceSynchronize(); return info; @@ -518,7 +520,8 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est(const std::vector<int>& inds void LossFunctionPearsonRMSEGPU::set_prop_train_est( Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties, int taskind, - int start) + int start, + int batch_size) { assert(estimated_training_properties.extent(0) >= start + _task_sizes_train[taskind]); assert(estimated_training_properties.extent(1) <= MAX_BATCHES); @@ -531,7 +534,7 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est( auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>( {0, 0}, - {static_cast<size_t>(_task_sizes_train[taskind]), estimated_training_properties.extent(1)}); + {_task_sizes_train[taskind], batch_size}); auto kernel = KOKKOS_LAMBDA(const int material_idx, const int model_idx) { for (size_t feature_idx = 0; feature_idx < n_dim; ++feature_idx) diff --git a/src/loss_function/LossFunctionPearsonRMSEGPU.hpp b/src/loss_function/LossFunctionPearsonRMSEGPU.hpp index 2fabfcc14e29bdd11ee4b6c0ccaffa8f5ec44982..e80e4a284cb63143e405b42f80ff286e708bf518 100644 --- a/src/loss_function/LossFunctionPearsonRMSEGPU.hpp +++ b/src/loss_function/LossFunctionPearsonRMSEGPU.hpp @@ -42,7 +42,7 @@ public: using PropertyView = Kokkos::View<double**, Kokkos::LayoutLeft>; protected: - static constexpr int MAX_BATCHES = 65536; + static constexpr int MAX_BATCHES = 262144; /// dim 0: material samples /// dim 1: features @@ -176,7 +176,10 @@ public: */ virtual void set_a(const std::vector<int>& inds, int taskind, int start); - virtual void set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models, int taskind, int start); + virtual void set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models, + int taskind, + int start, + int batch_size = MAX_BATCHES); /** * @brief Set the A matrix used for solving the least squares regression @@ -187,7 +190,7 @@ public: */ virtual void set_a(const std::vector<model_node_ptr>& feats, int taskind, int start); - void set_b(int taskind, int start); + void set_b(int taskind, int start, int batch_size = MAX_BATCHES); /** * @brief Set the error vector @@ -201,7 +204,8 @@ public: void set_prop_train_est( Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties, int taskind, - int start); + int start, + int batch_size = MAX_BATCHES); /** * @brief Set the error @@ -232,7 +236,7 @@ public: * @param start The offset needed from the head of the feature's test data to where the task starts * @return info The final info value from dgels */ - int least_squares(int taskind, int start); + int least_squares(int taskind, int start, int batch_size = MAX_BATCHES); /** * @brief Reset the the property used for projection