diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7b7ac391c0de7aa4e1fc021406bf06e2f3c50db1..fa910d1a6acaf7af96f3725b34d2a2dfd4339c23 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -23,7 +23,7 @@ build-intel-base:
     - export MKLROOT=/home/runner/intel/oneapi/mkl/latest/
     - export LD_LIBRARY_PATH=$I_MPI_ROOT/lib/:$I_MPI_ROOT/lib/release:$MKLROOT/lib/intel64:$INTEL_COMP_ROOT/lib/:$INTEL_COMP_ROOT/compiler/lib/intel64/:$LD_LIBRARY_PATH:$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7
     - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=OFF -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../intel_base/ ../
+    - cmake -DSISSO_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=OFF -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../intel_base/ ../
     - make -j4
     - make install
     - cd ../
@@ -49,7 +49,7 @@ build-intel-py:
     - export LD_LIBRARY_PATH=$I_MPI_ROOT/lib/:$I_MPI_ROOT/lib/release:$MKLROOT/lib/intel64:$INTEL_COMP_ROOT/lib/:$INTEL_COMP_ROOT/compiler/lib/intel64/:$LD_LIBRARY_PATH:$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7
     - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:cpp_sisso_env_intel_py/lib/python3.7/site-packages/
     - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=OFF -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../intel_py/ ../
+    - cmake -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=OFF -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../intel_py/ ../
     - make -j4
     - make install
     - cd ../
@@ -73,7 +73,7 @@ build-intel-param:
     - export MKLROOT=/home/runner/intel/oneapi/mkl/latest/
     - export LD_LIBRARY_PATH=$I_MPI_ROOT/lib/:$I_MPI_ROOT/lib/release:$MKLROOT/lib/intel64:$INTEL_COMP_ROOT/lib/:$INTEL_COMP_ROOT/compiler/lib/intel64/:$LD_LIBRARY_PATH:$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7
     - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../intel_param/ ../
+    - cmake -DSISSO_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../intel_param/ ../
     - make -j4
     - make install
     - cd ../
@@ -99,7 +99,7 @@ build-intel-param-py:
     - export LD_LIBRARY_PATH=$I_MPI_ROOT/lib/:$I_MPI_ROOT/lib/release:$MKLROOT/lib/intel64:$INTEL_COMP_ROOT/lib/:$INTEL_COMP_ROOT/compiler/lib/intel64/:$LD_LIBRARY_PATH:$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7
     - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:cpp_sisso_env_intel_param_py/lib/python3.7/site-packages/
     - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../intel_param_py/ ../
+    - cmake -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../intel_param_py/ ../
     - make -j4
     - make install
     - cd ../
@@ -243,7 +243,7 @@ build-gnu-base:
     - export LD_LIBRARY_PATH=$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7:$LD_LIBRARY_PATH
     - mkdir build_gnu_base/
     - cd build_gnu_base/
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=OFF -DBUILD_PYTHON=OFF  -DCMAKE_INSTALL_PREFIX=../gnu_base/ ../
+    - cmake -DSISSO_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=OFF -DBUILD_PYTHON=OFF  -DCMAKE_INSTALL_PREFIX=../gnu_base/ ../
     - make -j4
     - make install
     - cd ../
@@ -261,7 +261,7 @@ build-gnu-param:
     - export LD_LIBRARY_PATH=$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7:$LD_LIBRARY_PATH
     - mkdir build_gnu_param/
     - cd build_gnu_param/
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=OFF  -DCMAKE_INSTALL_PREFIX=../gnu_param/ ../
+    - cmake -DSISSO_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=OFF  -DCMAKE_INSTALL_PREFIX=../gnu_param/ ../
     - make -j4
     - make install
     - cd ../
@@ -282,7 +282,7 @@ build-gnu-py:
     - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:cpp_sisso_gnu_py_env/lib/python3.7/site-packages/
     - mkdir build_py/
     - cd build_py/
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=OFF -DBUILD_PYTHON=ON  -DCMAKE_INSTALL_PREFIX=../gnu_py/ ../
+    - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=OFF -DBUILD_PYTHON=ON  -DCMAKE_INSTALL_PREFIX=../gnu_py/ ../
     - make -j4
     - make install
     - cd ../
@@ -304,7 +304,7 @@ build-gnu-param-py:
     - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:cpp_sisso_gnu_param_py_env/lib/python3.7/site-packages/
     - mkdir build_param_py/
     - cd build_param_py/
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON  -DCMAKE_INSTALL_PREFIX=../gnu_param_py/ ../
+    - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON  -DCMAKE_INSTALL_PREFIX=../gnu_param_py/ ../
     - make -j4
     - make install
     - cd ../
@@ -442,7 +442,7 @@ build-gnu-gcov:
     - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:`pwd`/cpp_sisso_gnu_gcov_env/lib/python3.7/site-packages/
     - mkdir build_gcov/
     - cd build_gcov/
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE="Coverage" -DMPIEXEC_EXECUTABLE=/usr/bin/mpiexec -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON  -DCMAKE_INSTALL_PREFIX=../gnu_gcov/ ../
+    - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE="Coverage" -DMPIEXEC_EXECUTABLE=/usr/bin/mpiexec -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON  -DCMAKE_INSTALL_PREFIX=../gnu_gcov/ ../
     - make install
     - make coverage
     - cd ../
@@ -467,7 +467,7 @@ build-gnu-lcov:
     - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:`pwd`/cpp_sisso_gnu_lcov_env/lib/python3.7/site-packages/
     - mkdir build_lcov/
     - cd build_lcov/
-    - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE="Coverage" -DMPIEXEC_EXECUTABLE=/usr/bin/mpiexec -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_lcov/ ../
+    - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE="Coverage" -DMPIEXEC_EXECUTABLE=/usr/bin/mpiexec -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_lcov/ ../
     - make install
     - make coverage_html
     - cd ../
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 3e6d9a2d5409148e84cbfd665a1eab227f86f321..d633bd2e691ec959a32e515ae85cc191351c2260 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -39,10 +39,21 @@ set_target_properties(libsisso
     PREFIX ""
 )
 
-target_link_libraries(libsisso Kokkos::kokkos)
+option(SISSO_ENABLE_CUDA "Enable CUDA support" OFF)
+if (SISSO_ENABLE_CUDA)
+    target_compile_definitions(libsisso PRIVATE SISSO_ENABLE_CUDA)
+
+    find_package(CUDAToolkit REQUIRED)
+    target_link_libraries(libsisso CUDA::cudart CUDA::cublas)
 
-find_package(CUDAToolkit REQUIRED)
-target_link_libraries(libsisso CUDA::cudart CUDA::cublas)
+    set(Kokkos_ENABLE_CUDA ON CACHE BOOL "" FORCE)
+    set(Kokkos_ENABLE_CUDA_CONSTEXPR ON CACHE BOOL "" FORCE)
+    set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "" FORCE)
+    set(Kokkos_ENABLE_CUDA_UVM ON CACHE BOOL "" FORCE)
+endif()
+set(Kokkos_CXX_STANDARD 17 CACHE STRING "" FORCE)
+set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE)
+target_link_libraries(libsisso Kokkos::kokkos)
 
 target_link_libraries(libsisso ${LAPACK_LIBRARIES} ${MPI_LIBRARIES} -Wl,--rpath=${Boost_LIB_DIR} -Wl,--rpath=${LAPACK_DIR} ${Boost_LIBRARIES} ${COIN_CLP_LIBRARIES} ${OPENMP_LIBRARIES} ${FMT_LIBRARIES})
 install(TARGETS libsisso DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/)
diff --git a/src/loss_function/RMSEGPU.cpp b/src/loss_function/RMSEGPU.cpp
index 2d8d679b949b39e5b6d4af047ae2c070509b06d5..8b633ccadcc0d4069bbbe2184f4d40694ceec1e7 100644
--- a/src/loss_function/RMSEGPU.cpp
+++ b/src/loss_function/RMSEGPU.cpp
@@ -25,20 +25,6 @@
 
 #include <Kokkos_StdAlgorithms.hpp>
 
-#define CHECK_CUDA_ERROR(ans)                        \
-    {                                                \
-        check_cuda_error((ans), __FILE__, __LINE__); \
-    }
-inline void check_cuda_error(cudaError_t code, const char* file, int line, bool abort = true)
-{
-    if (code != cudaSuccess)
-    {
-        std::cerr << fmt::format("CUDA ERROR: {} {} {}", cudaGetErrorString(code), file, line)
-                  << std::endl;
-        if (abort) exit(code);
-    }
-}
-
 void get_mean_squared_difference(const Kokkos::View<double*>& output,
                                  const Kokkos::View<const double*>& input1,
                                  const Kokkos::View<const double**, Kokkos::LayoutLeft>& input2)
diff --git a/src/loss_function/RMSEGPU.hpp b/src/loss_function/RMSEGPU.hpp
index f1016b0c600438208aef80484843f1d31ccdfa72..1c47b3624859fd8306ffad4429cda0f4405416e3 100644
--- a/src/loss_function/RMSEGPU.hpp
+++ b/src/loss_function/RMSEGPU.hpp
@@ -16,14 +16,11 @@
  *  @brief Defines the class that uses a Pearson correlation projection operator and a least-squares regression objective function
  *
  *  @author Sebastian Eibl <sebastian.eibl@mpcdf.mpg.de>
- *  @bug No known bugs.
  */
 
-#ifndef LOSS_FUNCTION_PEARSON_RMSE_GPU
-#define LOSS_FUNCTION_PEARSON_RMSE_GPU
+#pragma once
 
-#include <cublas_v2.h>
-#include <cuda_runtime.h>
+#include "utils/cuda.hpp"
 
 #include <Kokkos_Core.hpp>
 
@@ -33,7 +30,7 @@
 
 // DocString: cls_loss_function_pearson_rmse
 /**
- * @brief The loss function used for regression problems
+ * @brief The loss function used for regression problems on GPU
  *
  */
 class RMSEGPU
@@ -45,11 +42,11 @@ private:
     PropertiesVector::VECTOR_TYPE _properties;
     std::vector<int> _task_sizes;
 
-    bool _fix_intercept;
-    int _n_feat;
-    int _n_dim;
-    int _n_samp;
-    int _n_task;
+    bool _fix_intercept; //!< If true then the bias term is fixed at 0
+    int _n_feat; //!< Number features in the linear model
+    int _n_dim; //!< Total number of constants to fit (scale and bias terms)
+    int _n_samp; //!< Number of samples in the training set
+    int _n_task; //!< Number of tasks
 
     /// dim 0: material samples
     /// dim 1: features
@@ -58,7 +55,6 @@ private:
     /// dim 0: material properties
     /// dim 1: batch
     Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> _b;
-    Kokkos::View<double*> _work;  //!< Work vector for dgels
     Kokkos::View<double[MAX_BATCHES]> _batched_scores;
     Kokkos::View<int* [MAX_BATCHES], Kokkos::LayoutLeft> _models;
 
@@ -69,6 +65,15 @@ private:
     double** _batched_bs = nullptr;
 
 public:
+    /**
+     * @brief Constructor
+     *
+     * @param descriptor_matrix descriptor matrix
+     * @param properties properties vector
+     * @param task_sizes number of items per task
+     * @param fix_intercept use a fixed offset?
+     * @param n_feat Number features in the linear model
+     */
     RMSEGPU(const DescriptorMatrix::MATRIX_TYPE& descriptor_matrix,
             const PropertiesVector::VECTOR_TYPE& properties,
             const std::vector<int>& task_sizes,
@@ -77,15 +82,44 @@ public:
 
     ~RMSEGPU();
 
+    /**
+     * @brief Evaluate the loss function for a set of features
+     *
+     * @param feature_indices index tuples pointing into the descriptor matrix
+     * @return Final score for every index tuple
+     */
     Kokkos::View<double*> operator()(const std::vector<std::vector<int>>& feature_indices);
 
+    /**
+     * @brief Set the A matrix used for solving the least squares regression
+     *
+     * @param models index tuples
+     * @param taskind The task used for the least squares regression
+     * @param start The offset needed from the head of the feature's training data to where the task starts
+     * @param batch_size number of systems to solve simultaneously
+     */
     void set_a(const Kokkos::View<const int**, Kokkos::LayoutLeft>& models,
                int taskind,
                int start,
                int batch_size = MAX_BATCHES);
 
+    /**
+     * @brief Set the right hand side of the least square systems
+     *
+     * @param taskind The task used for the least squares regression
+     * @param start The offset needed from the head of the feature's training data to where the task starts
+     * @param batch_size number of systems to solve simultaneously
+     */
     void set_b(int taskind, int start, int batch_size = MAX_BATCHES);
 
+    /**
+     * @brief Calculate estimated properties
+     *
+     * @param estimated_training_properties estimated properties
+     * @param taskind The task for used for the least squares regression
+     * @param start The offset needed from the head of the feature's training data to where the task starts
+     * @param batch_size number of systems to solve simultaneously
+     */
     void set_prop_train_est(
         Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties,
         int taskind,
@@ -101,5 +135,3 @@ public:
      */
     int least_squares(int taskind, int start, int batch_size = MAX_BATCHES);
 };
-
-#endif
diff --git a/src/utils/cuda.hpp b/src/utils/cuda.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..fa7655851e1d37b618238c41ad6e0e0e92101d58
--- /dev/null
+++ b/src/utils/cuda.hpp
@@ -0,0 +1,129 @@
+// Copyright 2021 Thomas A. R. Purcell
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/**
+ *  @brief Wrapper for all CUDA related functionality.
+ *
+ *  @author Sebastian Eibl <sebastian.eibl@mpcdf.mpg.de>
+ */
+
+#pragma once
+
+#include <fmt/format.h>
+
+#include <iostream>
+
+#ifdef SISSO_ENABLE_CUDA
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+
+#define CHECK_CUDA_ERROR(ans)                        \
+    {                                                \
+        check_cuda_error((ans), __FILE__, __LINE__); \
+    }
+inline void check_cuda_error(cudaError_t code, const char* file, int line, bool abort = true)
+{
+    if (code != cudaSuccess)
+    {
+        std::cerr << fmt::format("CUDA ERROR: {} {} {}", cudaGetErrorString(code), file, line)
+                  << std::endl;
+        if (abort) exit(code);
+    }
+}
+#else
+struct cublasContext
+{
+};
+typedef struct cublasContext* cublasHandle_t;
+
+struct cudaError_t
+{
+};
+
+#define CHECK_CUDA_ERROR(ans) \
+    {                         \
+    }
+
+inline void check_cuda_error(cudaError_t /*code*/,
+                             const char* /*file*/,
+                             int /*line*/,
+                             bool /*abort*/)
+{
+}
+
+typedef enum
+{
+    CUBLAS_STATUS_SUCCESS = 0,
+    CUBLAS_STATUS_NOT_INITIALIZED = 1,
+    CUBLAS_STATUS_ALLOC_FAILED = 3,
+    CUBLAS_STATUS_INVALID_VALUE = 7,
+    CUBLAS_STATUS_ARCH_MISMATCH = 8,
+    CUBLAS_STATUS_MAPPING_ERROR = 11,
+    CUBLAS_STATUS_EXECUTION_FAILED = 13,
+    CUBLAS_STATUS_INTERNAL_ERROR = 14,
+    CUBLAS_STATUS_NOT_SUPPORTED = 15,
+    CUBLAS_STATUS_LICENSE_ERROR = 16
+} cublasStatus_t;
+
+inline cublasStatus_t cublasCreate(cublasHandle_t* handle)
+{
+    throw std::runtime_error("CUDA support is not enabled!");
+};
+
+inline cublasStatus_t cublasDestroy(cublasHandle_t handle)
+{
+    throw std::runtime_error("CUDA support is not enabled!");
+}
+
+template <class T>
+inline cudaError_t cudaMallocManaged(T** devPtr, size_t size, unsigned int flags = 0)
+{
+    throw std::runtime_error("CUDA support is not enabled!");
+}
+
+inline cudaError_t cudaFree(void* /*devPtr*/)
+{
+    throw std::runtime_error("CUDA support is not enabled!");
+}
+
+typedef enum
+{
+    CUBLAS_OP_N = 0,
+    CUBLAS_OP_T = 1,
+    CUBLAS_OP_C = 2,
+    CUBLAS_OP_HERMITAN = 2, /* synonym if CUBLAS_OP_C */
+    CUBLAS_OP_CONJG = 3     /* conjugate, placeholder - not supported in the current release */
+} cublasOperation_t;
+
+inline cublasStatus_t cublasDgelsBatched(cublasHandle_t handle,
+                                         cublasOperation_t trans,
+                                         int m,
+                                         int n,
+                                         int nrhs,
+                                         double* const Aarray[], /*Device pointer*/
+                                         int lda,
+                                         double* const Carray[], /*Device pointer*/
+                                         int ldc,
+                                         int* info,
+                                         int* devInfoArray, /*Device pointer*/
+                                         int batchSize)
+{
+    throw std::runtime_error("CUDA support is not enabled!");
+}
+
+inline cudaError_t cudaDeviceSynchronize(void)
+{
+    throw std::runtime_error("CUDA support is not enabled!");
+}
+#endif
\ No newline at end of file