even larger batch size

72df70ae · Sebastian Eibl · 36add9ca · 72df70ae · 72df70ae · 72df70ae
Commit 72df70ae authored Apr 8, 2022 by Sebastian Eibl
--- a/src/descriptor_identifier/solver/SISSOSolver.cpp
+++ b/src/descriptor_identifier/solver/SISSOSolver.cpp
@@ -196,7 +196,7 @@ void SISSOSolver::l0_regularization_gpu(const int n_dim)
    std::vector<std::vector<int>> feature_indices;
    while (!feature_combinations.is_finished())
    {
-        const size_t MAX_BATCH_SIZE = 65536;
+        const size_t MAX_BATCH_SIZE = 262144;
        feature_indices.clear();
        for (auto counter = 0; counter < MAX_BATCH_SIZE; ++counter)
        {


--- a/src/loss_function/LossFunctionPearsonRMSEGPU.cpp
+++ b/src/loss_function/LossFunctionPearsonRMSEGPU.cpp
@@ -362,14 +362,14 @@ Kokkos::View<double*> LossFunctionPearsonRMSEGPU::operator()(
    int start = 0;
    for (int task_idx = 0; task_idx < _n_task; ++task_idx)
    {
-        set_a(_models, task_idx, start);
+        set_a(_models, task_idx, start, batch_size);
-        set_b(task_idx, start);
+        set_b(task_idx, start, batch_size);
        Kokkos::fence();
-        least_squares(task_idx, start);
+        least_squares(task_idx, start, batch_size);
-        set_a(_models, task_idx, start);
+        set_a(_models, task_idx, start, batch_size);
        Kokkos::fence();
-        set_prop_train_est(_estimated_training_properties, task_idx, start);
+        set_prop_train_est(_estimated_training_properties, task_idx, start, batch_size);
        start += _task_sizes_train[task_idx];
    }
@@ -434,7 +434,8 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<int>& inds, int taskind
 void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models,
                                       int taskind,
-                                       int start)
+                                       int start,
+                                       int batch_size)
 {
    assert(_descriptor_matrix.extent(0) >= _task_sizes_train[taskind] + start);
    Kokkos::deep_copy(_a, 1.0);
@@ -443,7 +444,7 @@ void LossFunctionPearsonRMSEGPU::set_a(Kokkos::View<int**, Kokkos::LayoutLeft> m
    auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<3>>(
        {0, 0, 0},
-        {static_cast<size_t>(_task_sizes_train[taskind]), models.extent(0), models.extent(1)});
+        {_task_sizes_train[taskind], static_cast<int>(models.extent(0)), batch_size});
    auto kernel = KOKKOS_LAMBDA(const int sample_idx, const int feature_idx, const int model_idx)
    {
        a(sample_idx, feature_idx, model_idx) = descriptor_matrix(sample_idx + start,
@@ -462,12 +463,13 @@ void LossFunctionPearsonRMSEGPU::set_a(const std::vector<model_node_ptr>& feats,
    }
 }
-void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start)
+void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start,
+                                       int batch_size)
 {
    auto b = _b;
    auto training_properties = _training_properties;
    auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0},
-                                                         {_task_sizes_train[taskind], MAX_BATCHES});
+                                                         {_task_sizes_train[taskind], batch_size});
    auto kernel = KOKKOS_LAMBDA(const int material_idx, const int batch_idx)
    {
        b(material_idx, batch_idx) = training_properties(start + material_idx);
@@ -475,7 +477,7 @@ void LossFunctionPearsonRMSEGPU::set_b(int taskind, int start)
    Kokkos::parallel_for("LossFunctionPearsonRMSE::set_b", policy, kernel);
 }
-int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start)
+int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start, int batch_size)
 {
    int info;
@@ -490,7 +492,7 @@ int LossFunctionPearsonRMSEGPU::least_squares(int taskind, int start)
                       _task_sizes_train[taskind],
                       &info,
                       nullptr,
-                       MAX_BATCHES);
+                       batch_size);
    cudaDeviceSynchronize();
    return info;
@@ -518,7 +520,8 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est(const std::vector<int>& inds
 void LossFunctionPearsonRMSEGPU::set_prop_train_est(
    Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties,
    int taskind,
-    int start)
+    int start,
+    int batch_size)
 {
    assert(estimated_training_properties.extent(0) >= start + _task_sizes_train[taskind]);
    assert(estimated_training_properties.extent(1) <= MAX_BATCHES);
@@ -531,7 +534,7 @@ void LossFunctionPearsonRMSEGPU::set_prop_train_est(
    auto policy = Kokkos::MDRangePolicy<Kokkos::Rank<2>>(
        {0, 0},
-        {static_cast<size_t>(_task_sizes_train[taskind]), estimated_training_properties.extent(1)});
+        {_task_sizes_train[taskind], batch_size});
    auto kernel = KOKKOS_LAMBDA(const int material_idx, const int model_idx)
    {
        for (size_t feature_idx = 0; feature_idx < n_dim; ++feature_idx)


--- a/src/loss_function/LossFunctionPearsonRMSEGPU.hpp
+++ b/src/loss_function/LossFunctionPearsonRMSEGPU.hpp
@@ -42,7 +42,7 @@ public:
    using PropertyView = Kokkos::View<double**, Kokkos::LayoutLeft>;
 protected:
-    static constexpr int MAX_BATCHES = 65536;
+    static constexpr int MAX_BATCHES = 262144;
    /// dim 0: material samples
    /// dim 1: features
@@ -176,7 +176,10 @@ public:
     */
    virtual void set_a(const std::vector<int>& inds, int taskind, int start);
-    virtual void set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models, int taskind, int start);
+    virtual void set_a(Kokkos::View<int**, Kokkos::LayoutLeft> models,
+                       int taskind,
+                       int start,
+                       int batch_size = MAX_BATCHES);
    /**
     * @brief Set the A matrix used for solving the least squares regression
@@ -187,7 +190,7 @@ public:
     */
    virtual void set_a(const std::vector<model_node_ptr>& feats, int taskind, int start);
-    void set_b(int taskind, int start);
+    void set_b(int taskind, int start, int batch_size = MAX_BATCHES);
    /**
     * @brief Set the error vector
@@ -201,7 +204,8 @@ public:
    void set_prop_train_est(
        Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties,
        int taskind,
-        int start);
+        int start,
+        int batch_size = MAX_BATCHES);
    /**
     * @brief Set the error
@@ -232,7 +236,7 @@ public:
     * @param start The offset needed from the head of the feature's test data to where the task starts
     * @return info The final info value from dgels
     */
-    int least_squares(int taskind, int start);
+    int least_squares(int taskind, int start, int batch_size = MAX_BATCHES);
    /**
     * @brief Reset the the property used for projection