Skip to content
Snippets Groups Projects
Commit 72df70ae authored by Sebastian Eibl's avatar Sebastian Eibl
Browse files

even larger batch size

parent 36add9ca
No related branches found
No related tags found
No related merge requests found
...@@ -196,7 +196,7 @@ void SISSOSolver::l0_regularization_gpu(const int n_dim) ...@@ -196,7 +196,7 @@ void SISSOSolver::l0_regularization_gpu(const int n_dim)
std::vector<std::vector<int>> feature_indices; std::vector<std::vector<int>> feature_indices;
while (!feature_combinations.is_finished()) while (!feature_combinations.is_finished())
{ {
const size_t MAX_BATCH_SIZE = 65536; const size_t MAX_BATCH_SIZE = 262144;
feature_indices.clear(); feature_indices.clear();
for (auto counter = 0; counter < MAX_BATCH_SIZE; ++counter) for (auto counter = 0; counter < MAX_BATCH_SIZE; ++counter)
{ {
... ...
......
...@@ -362,14 +362,14 @@ Kokkos::View<double*> LossFunctionPearsonRMSEGPU::operator()( ...@@ -362,14 +362,14 @@ Kokkos::View<double*> LossFunctionPearsonRMSEGPU::operator()(
int start = 0; int start = 0;
for (int task_idx = 0; task_idx < _n_task; ++task_idx) for (int task_idx = 0; task_idx < _n_task; ++task_idx)
{ {
set_a(_models, task_idx, start); set_a(_models, task_idx, start, batch_size);
set_b(task_idx, start); set_b(task_idx, start, batch_size);
Kokkos::fence(); Kokkos::fence();
least_squares(task_idx, start); least_squares(task_idx, start, batch_size);
set_a(_models, task_idx, start); set_a(_models, task_idx, start, batch_size);
Kokkos::fence(); Kokkos::fence();
set_prop_train_est(_estimated_training_properties, task_idx, start); set_prop_train_est(_estimated_training_properties, task_idx, start, batch_size);
start += _task_sizes_train[task_idx]; start += _task_sizes_train[task_idx];
} }
...@@ -434,7 +434,8 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<int>& inds, int taskind ...@@ -434,7 +434,8 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<int>& inds, int taskind
void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models, void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models,
int taskind, int taskind,
int start) int start,
int batch_size)
{ {
assert(_descriptor_matrix.extent(0) >= _task_sizes_train[taskind] + start); assert(_descriptor_matrix.extent(0) >= _task_sizes_train[taskind] + start);
Kokkos::deep_copy(_a, 1.0); Kokkos::deep_copy(_a, 1.0);
...@@ -443,7 +444,7 @@ void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> m ...@@ -443,7 +444,7 @@ void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> m
auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<3>>( auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
{0, 0, 0}, {0, 0, 0},
{static_cast<size_t>(_task_sizes_train[taskind]), models.extent(0), models.extent(1)}); {_task_sizes_train[taskind], static_cast<int>(models.extent(0)), batch_size});
auto kernel = KOKKOS_LAMBDA(const int sample_idx, const int feature_idx, const int model_idx) auto kernel = KOKKOS_LAMBDA(const int sample_idx, const int feature_idx, const int model_idx)
{ {
a(sample_idx, feature_idx, model_idx) = descriptor_matrix(sample_idx + start, a(sample_idx, feature_idx, model_idx) = descriptor_matrix(sample_idx + start,
...@@ -462,12 +463,13 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<model_node_ptr>& feats, ...@@ -462,12 +463,13 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<model_node_ptr>& feats,
} }
} }
void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start) void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start,
int batch_size)
{ {
auto b = _b; auto b = _b;
auto training_properties = _training_properties; auto training_properties = _training_properties;
auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0},
{_task_sizes_train[taskind], MAX_BATCHES}); {_task_sizes_train[taskind], batch_size});
auto kernel = KOKKOS_LAMBDA(const int material_idx, const int batch_idx) auto kernel = KOKKOS_LAMBDA(const int material_idx, const int batch_idx)
{ {
b(material_idx, batch_idx) = training_properties(start + material_idx); b(material_idx, batch_idx) = training_properties(start + material_idx);
...@@ -475,7 +477,7 @@ void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start) ...@@ -475,7 +477,7 @@ void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start)
Kokkos::parallel_for("LossFunctionPearsonRMSE::set_b", policy, kernel); Kokkos::parallel_for("LossFunctionPearsonRMSE::set_b", policy, kernel);
} }
int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start) int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start, int batch_size)
{ {
int info; int info;
...@@ -490,7 +492,7 @@ int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start) ...@@ -490,7 +492,7 @@ int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start)
_task_sizes_train[taskind], _task_sizes_train[taskind],
&info, &info,
nullptr, nullptr,
MAX_BATCHES); batch_size);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
return info; return info;
...@@ -518,7 +520,8 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est(const std::vector<int>& inds ...@@ -518,7 +520,8 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est(const std::vector<int>& inds
void LossFunctionPearsonRMSEGPU::set_prop_train_est( void LossFunctionPearsonRMSEGPU::set_prop_train_est(
Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties, Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties,
int taskind, int taskind,
int start) int start,
int batch_size)
{ {
assert(estimated_training_properties.extent(0) >= start + _task_sizes_train[taskind]); assert(estimated_training_properties.extent(0) >= start + _task_sizes_train[taskind]);
assert(estimated_training_properties.extent(1) <= MAX_BATCHES); assert(estimated_training_properties.extent(1) <= MAX_BATCHES);
...@@ -531,7 +534,7 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est( ...@@ -531,7 +534,7 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est(
auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>( auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>(
{0, 0}, {0, 0},
{static_cast<size_t>(_task_sizes_train[taskind]), estimated_training_properties.extent(1)}); {_task_sizes_train[taskind], batch_size});
auto kernel = KOKKOS_LAMBDA(const int material_idx, const int model_idx) auto kernel = KOKKOS_LAMBDA(const int material_idx, const int model_idx)
{ {
for (size_t feature_idx = 0; feature_idx < n_dim; ++feature_idx) for (size_t feature_idx = 0; feature_idx < n_dim; ++feature_idx)
... ...
......
...@@ -42,7 +42,7 @@ public: ...@@ -42,7 +42,7 @@ public:
using PropertyView = Kokkos::View<double**, Kokkos::LayoutLeft>; using PropertyView = Kokkos::View<double**, Kokkos::LayoutLeft>;
protected: protected:
static constexpr int MAX_BATCHES = 65536; static constexpr int MAX_BATCHES = 262144;
/// dim 0: material samples /// dim 0: material samples
/// dim 1: features /// dim 1: features
...@@ -176,7 +176,10 @@ public: ...@@ -176,7 +176,10 @@ public:
*/ */
virtual void set_a(const std::vector<int>& inds, int taskind, int start); virtual void set_a(const std::vector<int>& inds, int taskind, int start);
virtual void set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models, int taskind, int start); virtual void set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models,
int taskind,
int start,
int batch_size = MAX_BATCHES);
/** /**
* @brief Set the A matrix used for solving the least squares regression * @brief Set the A matrix used for solving the least squares regression
...@@ -187,7 +190,7 @@ public: ...@@ -187,7 +190,7 @@ public:
*/ */
virtual void set_a(const std::vector<model_node_ptr>& feats, int taskind, int start); virtual void set_a(const std::vector<model_node_ptr>& feats, int taskind, int start);
void set_b(int taskind, int start); void set_b(int taskind, int start, int batch_size = MAX_BATCHES);
/** /**
* @brief Set the error vector * @brief Set the error vector
...@@ -201,7 +204,8 @@ public: ...@@ -201,7 +204,8 @@ public:
void set_prop_train_est( void set_prop_train_est(
Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties, Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties,
int taskind, int taskind,
int start); int start,
int batch_size = MAX_BATCHES);
/** /**
* @brief Set the error * @brief Set the error
...@@ -232,7 +236,7 @@ public: ...@@ -232,7 +236,7 @@ public:
* @param start The offset needed from the head of the feature's test data to where the task starts * @param start The offset needed from the head of the feature's test data to where the task starts
* @return info The final info value from dgels * @return info The final info value from dgels
*/ */
int least_squares(int taskind, int start); int least_squares(int taskind, int start, int batch_size = MAX_BATCHES);
/** /**
* @brief Reset the the property used for projection * @brief Reset the the property used for projection
... ...
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment