Skip to content
Snippets Groups Projects
Commit 72df70ae authored by Sebastian Eibl's avatar Sebastian Eibl
Browse files

even larger batch size

parent 36add9ca
No related branches found
No related tags found
No related merge requests found
......@@ -196,7 +196,7 @@ void SISSOSolver::l0_regularization_gpu(const int n_dim)
std::vector<std::vector<int>> feature_indices;
while (!feature_combinations.is_finished())
{
const size_t MAX_BATCH_SIZE = 65536;
const size_t MAX_BATCH_SIZE = 262144;
feature_indices.clear();
for (auto counter = 0; counter < MAX_BATCH_SIZE; ++counter)
{
......
......@@ -362,14 +362,14 @@ Kokkos::View<double*> LossFunctionPearsonRMSEGPU::operator()(
int start = 0;
for (int task_idx = 0; task_idx < _n_task; ++task_idx)
{
set_a(_models, task_idx, start);
set_b(task_idx, start);
set_a(_models, task_idx, start, batch_size);
set_b(task_idx, start, batch_size);
Kokkos::fence();
least_squares(task_idx, start);
least_squares(task_idx, start, batch_size);
set_a(_models, task_idx, start);
set_a(_models, task_idx, start, batch_size);
Kokkos::fence();
set_prop_train_est(_estimated_training_properties, task_idx, start);
set_prop_train_est(_estimated_training_properties, task_idx, start, batch_size);
start += _task_sizes_train[task_idx];
}
......@@ -434,7 +434,8 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<int>& inds, int taskind
void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models,
int taskind,
int start)
int start,
int batch_size)
{
assert(_descriptor_matrix.extent(0) >= _task_sizes_train[taskind] + start);
Kokkos::deep_copy(_a, 1.0);
......@@ -443,7 +444,7 @@ void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> m
auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
{0, 0, 0},
{static_cast<size_t>(_task_sizes_train[taskind]), models.extent(0), models.extent(1)});
{_task_sizes_train[taskind], static_cast<int>(models.extent(0)), batch_size});
auto kernel = KOKKOS_LAMBDA(const int sample_idx, const int feature_idx, const int model_idx)
{
a(sample_idx, feature_idx, model_idx) = descriptor_matrix(sample_idx + start,
......@@ -462,12 +463,13 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<model_node_ptr>& feats,
}
}
void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start)
void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start,
int batch_size)
{
auto b = _b;
auto training_properties = _training_properties;
auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0},
{_task_sizes_train[taskind], MAX_BATCHES});
{_task_sizes_train[taskind], batch_size});
auto kernel = KOKKOS_LAMBDA(const int material_idx, const int batch_idx)
{
b(material_idx, batch_idx) = training_properties(start + material_idx);
......@@ -475,7 +477,7 @@ void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start)
Kokkos::parallel_for("LossFunctionPearsonRMSE::set_b", policy, kernel);
}
int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start)
int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start, int batch_size)
{
int info;
......@@ -490,7 +492,7 @@ int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start)
_task_sizes_train[taskind],
&info,
nullptr,
MAX_BATCHES);
batch_size);
cudaDeviceSynchronize();
return info;
......@@ -518,7 +520,8 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est(const std::vector<int>& inds
void LossFunctionPearsonRMSEGPU::set_prop_train_est(
Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties,
int taskind,
int start)
int start,
int batch_size)
{
assert(estimated_training_properties.extent(0) >= start + _task_sizes_train[taskind]);
assert(estimated_training_properties.extent(1) <= MAX_BATCHES);
......@@ -531,7 +534,7 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est(
auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>(
{0, 0},
{static_cast<size_t>(_task_sizes_train[taskind]), estimated_training_properties.extent(1)});
{_task_sizes_train[taskind], batch_size});
auto kernel = KOKKOS_LAMBDA(const int material_idx, const int model_idx)
{
for (size_t feature_idx = 0; feature_idx < n_dim; ++feature_idx)
......
......@@ -42,7 +42,7 @@ public:
using PropertyView = Kokkos::View<double**, Kokkos::LayoutLeft>;
protected:
static constexpr int MAX_BATCHES = 65536;
static constexpr int MAX_BATCHES = 262144;
/// dim 0: material samples
/// dim 1: features
......@@ -176,7 +176,10 @@ public:
*/
virtual void set_a(const std::vector<int>& inds, int taskind, int start);
virtual void set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models, int taskind, int start);
virtual void set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models,
int taskind,
int start,
int batch_size = MAX_BATCHES);
/**
* @brief Set the A matrix used for solving the least squares regression
......@@ -187,7 +190,7 @@ public:
*/
virtual void set_a(const std::vector<model_node_ptr>& feats, int taskind, int start);
void set_b(int taskind, int start);
void set_b(int taskind, int start, int batch_size = MAX_BATCHES);
/**
* @brief Set the error vector
......@@ -201,7 +204,8 @@ public:
void set_prop_train_est(
Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties,
int taskind,
int start);
int start,
int batch_size = MAX_BATCHES);
/**
* @brief Set the error
......@@ -232,7 +236,7 @@ public:
* @param start The offset needed from the head of the feature's test data to where the task starts
* @return info The final info value from dgels
*/
int least_squares(int taskind, int start);
int least_squares(int taskind, int start, int batch_size = MAX_BATCHES);
/**
* @brief Reset the the property used for projection
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment