diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 7b7ac391c0de7aa4e1fc021406bf06e2f3c50db1..fa910d1a6acaf7af96f3725b34d2a2dfd4339c23 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -23,7 +23,7 @@ build-intel-base: - export MKLROOT=/home/runner/intel/oneapi/mkl/latest/ - export LD_LIBRARY_PATH=$I_MPI_ROOT/lib/:$I_MPI_ROOT/lib/release:$MKLROOT/lib/intel64:$INTEL_COMP_ROOT/lib/:$INTEL_COMP_ROOT/compiler/lib/intel64/:$LD_LIBRARY_PATH:$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7 - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=OFF -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../intel_base/ ../ + - cmake -DSISSO_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=OFF -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../intel_base/ ../ - make -j4 - make install - cd ../ @@ -49,7 +49,7 @@ build-intel-py: - export LD_LIBRARY_PATH=$I_MPI_ROOT/lib/:$I_MPI_ROOT/lib/release:$MKLROOT/lib/intel64:$INTEL_COMP_ROOT/lib/:$INTEL_COMP_ROOT/compiler/lib/intel64/:$LD_LIBRARY_PATH:$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7 - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:cpp_sisso_env_intel_py/lib/python3.7/site-packages/ - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=OFF -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../intel_py/ ../ + - cmake -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=OFF -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../intel_py/ ../ - make -j4 - make install - cd ../ @@ -73,7 +73,7 @@ build-intel-param: - export MKLROOT=/home/runner/intel/oneapi/mkl/latest/ - export LD_LIBRARY_PATH=$I_MPI_ROOT/lib/:$I_MPI_ROOT/lib/release:$MKLROOT/lib/intel64:$INTEL_COMP_ROOT/lib/:$INTEL_COMP_ROOT/compiler/lib/intel64/:$LD_LIBRARY_PATH:$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7 - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../intel_param/ ../ + - cmake -DSISSO_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../intel_param/ ../ - make -j4 - make install - cd ../ @@ -99,7 +99,7 @@ build-intel-param-py: - export LD_LIBRARY_PATH=$I_MPI_ROOT/lib/:$I_MPI_ROOT/lib/release:$MKLROOT/lib/intel64:$INTEL_COMP_ROOT/lib/:$INTEL_COMP_ROOT/compiler/lib/intel64/:$LD_LIBRARY_PATH:$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7 - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:cpp_sisso_env_intel_param_py/lib/python3.7/site-packages/ - export PATH=$INTEL_COMP_ROOT/bin/:$INTEL_COMP_ROOT/bin/intel64:$I_MPI_ROOT/bin:$PATH - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../intel_param_py/ ../ + - cmake -DCMAKE_CXX_COMPILER=icpc -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../intel_param_py/ ../ - make -j4 - make install - cd ../ @@ -243,7 +243,7 @@ build-gnu-base: - export LD_LIBRARY_PATH=$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7:$LD_LIBRARY_PATH - mkdir build_gnu_base/ - cd build_gnu_base/ - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=OFF -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../gnu_base/ ../ + - cmake -DSISSO_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=OFF -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../gnu_base/ ../ - make -j4 - make install - cd ../ @@ -261,7 +261,7 @@ build-gnu-param: - export LD_LIBRARY_PATH=$HOME/intel/oneapi/intelpython/latest/lib/:$HOME/intel/oneapi/intelpython/latest/lib/python3.7:$LD_LIBRARY_PATH - mkdir build_gnu_param/ - cd build_gnu_param/ - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../gnu_param/ ../ + - cmake -DSISSO_ENABLE_CUDA=ON -DKokkos_ARCH_AMPERE80=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=OFF -DCMAKE_INSTALL_PREFIX=../gnu_param/ ../ - make -j4 - make install - cd ../ @@ -282,7 +282,7 @@ build-gnu-py: - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:cpp_sisso_gnu_py_env/lib/python3.7/site-packages/ - mkdir build_py/ - cd build_py/ - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=OFF -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_py/ ../ + - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=OFF -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_py/ ../ - make -j4 - make install - cd ../ @@ -304,7 +304,7 @@ build-gnu-param-py: - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:cpp_sisso_gnu_param_py_env/lib/python3.7/site-packages/ - mkdir build_param_py/ - cd build_param_py/ - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_param_py/ ../ + - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_FLAGS="-O3" -DEXTERNAL_BUILD_N_PROCS=4 -DBUILD_TESTS=OFF -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_param_py/ ../ - make -j4 - make install - cd ../ @@ -442,7 +442,7 @@ build-gnu-gcov: - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:`pwd`/cpp_sisso_gnu_gcov_env/lib/python3.7/site-packages/ - mkdir build_gcov/ - cd build_gcov/ - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE="Coverage" -DMPIEXEC_EXECUTABLE=/usr/bin/mpiexec -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_gcov/ ../ + - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE="Coverage" -DMPIEXEC_EXECUTABLE=/usr/bin/mpiexec -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_gcov/ ../ - make install - make coverage - cd ../ @@ -467,7 +467,7 @@ build-gnu-lcov: - export PYTHONPATH=$HOME/intel/oneapi/intelpython/latest/lib/python3.7/site-packages/:`pwd`/cpp_sisso_gnu_lcov_env/lib/python3.7/site-packages/ - mkdir build_lcov/ - cd build_lcov/ - - cmake -DKokkos_CXX_STANDARD=17 -DKokkos_ARCH_AMPERE80=ON -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DKokkos_ENABLE_CUDA_CONSTEXPR=ON -DKokkos_ENABLE_CUDA_LAMBDA=ON -DKokkos_ENABLE_CUDA_UVM=ON -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE="Coverage" -DMPIEXEC_EXECUTABLE=/usr/bin/mpiexec -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_lcov/ ../ + - cmake -DCMAKE_CXX_COMPILER=g++ -DCMAKE_C_COMPILER=gcc -DCMAKE_BUILD_TYPE="Coverage" -DMPIEXEC_EXECUTABLE=/usr/bin/mpiexec -DBUILD_TESTS=ON -DBUILD_PARAMS=ON -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=../gnu_lcov/ ../ - make install - make coverage_html - cd ../ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 3e6d9a2d5409148e84cbfd665a1eab227f86f321..d633bd2e691ec959a32e515ae85cc191351c2260 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -39,10 +39,21 @@ set_target_properties(libsisso PREFIX "" ) -target_link_libraries(libsisso Kokkos::kokkos) +option(SISSO_ENABLE_CUDA "Enable CUDA support" OFF) +if (SISSO_ENABLE_CUDA) + target_compile_definitions(libsisso PRIVATE SISSO_ENABLE_CUDA) + + find_package(CUDAToolkit REQUIRED) + target_link_libraries(libsisso CUDA::cudart CUDA::cublas) -find_package(CUDAToolkit REQUIRED) -target_link_libraries(libsisso CUDA::cudart CUDA::cublas) + set(Kokkos_ENABLE_CUDA ON CACHE BOOL "" FORCE) + set(Kokkos_ENABLE_CUDA_CONSTEXPR ON CACHE BOOL "" FORCE) + set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "" FORCE) + set(Kokkos_ENABLE_CUDA_UVM ON CACHE BOOL "" FORCE) +endif() +set(Kokkos_CXX_STANDARD 17 CACHE STRING "" FORCE) +set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE) +target_link_libraries(libsisso Kokkos::kokkos) target_link_libraries(libsisso ${LAPACK_LIBRARIES} ${MPI_LIBRARIES} -Wl,--rpath=${Boost_LIB_DIR} -Wl,--rpath=${LAPACK_DIR} ${Boost_LIBRARIES} ${COIN_CLP_LIBRARIES} ${OPENMP_LIBRARIES} ${FMT_LIBRARIES}) install(TARGETS libsisso DESTINATION ${CMAKE_INSTALL_PREFIX}/lib/) diff --git a/src/loss_function/RMSEGPU.cpp b/src/loss_function/RMSEGPU.cpp index 2d8d679b949b39e5b6d4af047ae2c070509b06d5..8b633ccadcc0d4069bbbe2184f4d40694ceec1e7 100644 --- a/src/loss_function/RMSEGPU.cpp +++ b/src/loss_function/RMSEGPU.cpp @@ -25,20 +25,6 @@ #include <Kokkos_StdAlgorithms.hpp> -#define CHECK_CUDA_ERROR(ans) \ - { \ - check_cuda_error((ans), __FILE__, __LINE__); \ - } -inline void check_cuda_error(cudaError_t code, const char* file, int line, bool abort = true) -{ - if (code != cudaSuccess) - { - std::cerr << fmt::format("CUDA ERROR: {} {} {}", cudaGetErrorString(code), file, line) - << std::endl; - if (abort) exit(code); - } -} - void get_mean_squared_difference(const Kokkos::View<double*>& output, const Kokkos::View<const double*>& input1, const Kokkos::View<const double**, Kokkos::LayoutLeft>& input2) diff --git a/src/loss_function/RMSEGPU.hpp b/src/loss_function/RMSEGPU.hpp index f1016b0c600438208aef80484843f1d31ccdfa72..1c47b3624859fd8306ffad4429cda0f4405416e3 100644 --- a/src/loss_function/RMSEGPU.hpp +++ b/src/loss_function/RMSEGPU.hpp @@ -16,14 +16,11 @@ * @brief Defines the class that uses a Pearson correlation projection operator and a least-squares regression objective function * * @author Sebastian Eibl <sebastian.eibl@mpcdf.mpg.de> - * @bug No known bugs. */ -#ifndef LOSS_FUNCTION_PEARSON_RMSE_GPU -#define LOSS_FUNCTION_PEARSON_RMSE_GPU +#pragma once -#include <cublas_v2.h> -#include <cuda_runtime.h> +#include "utils/cuda.hpp" #include <Kokkos_Core.hpp> @@ -33,7 +30,7 @@ // DocString: cls_loss_function_pearson_rmse /** - * @brief The loss function used for regression problems + * @brief The loss function used for regression problems on GPU * */ class RMSEGPU @@ -45,11 +42,11 @@ private: PropertiesVector::VECTOR_TYPE _properties; std::vector<int> _task_sizes; - bool _fix_intercept; - int _n_feat; - int _n_dim; - int _n_samp; - int _n_task; + bool _fix_intercept; //!< If true then the bias term is fixed at 0 + int _n_feat; //!< Number features in the linear model + int _n_dim; //!< Total number of constants to fit (scale and bias terms) + int _n_samp; //!< Number of samples in the training set + int _n_task; //!< Number of tasks /// dim 0: material samples /// dim 1: features @@ -58,7 +55,6 @@ private: /// dim 0: material properties /// dim 1: batch Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> _b; - Kokkos::View<double*> _work; //!< Work vector for dgels Kokkos::View<double[MAX_BATCHES]> _batched_scores; Kokkos::View<int* [MAX_BATCHES], Kokkos::LayoutLeft> _models; @@ -69,6 +65,15 @@ private: double** _batched_bs = nullptr; public: + /** + * @brief Constructor + * + * @param descriptor_matrix descriptor matrix + * @param properties properties vector + * @param task_sizes number of items per task + * @param fix_intercept use a fixed offset? + * @param n_feat Number features in the linear model + */ RMSEGPU(const DescriptorMatrix::MATRIX_TYPE& descriptor_matrix, const PropertiesVector::VECTOR_TYPE& properties, const std::vector<int>& task_sizes, @@ -77,15 +82,44 @@ public: ~RMSEGPU(); + /** + * @brief Evaluate the loss function for a set of features + * + * @param feature_indices index tuples pointing into the descriptor matrix + * @return Final score for every index tuple + */ Kokkos::View<double*> operator()(const std::vector<std::vector<int>>& feature_indices); + /** + * @brief Set the A matrix used for solving the least squares regression + * + * @param models index tuples + * @param taskind The task used for the least squares regression + * @param start The offset needed from the head of the feature's training data to where the task starts + * @param batch_size number of systems to solve simultaneously + */ void set_a(const Kokkos::View<const int**, Kokkos::LayoutLeft>& models, int taskind, int start, int batch_size = MAX_BATCHES); + /** + * @brief Set the right hand side of the least square systems + * + * @param taskind The task used for the least squares regression + * @param start The offset needed from the head of the feature's training data to where the task starts + * @param batch_size number of systems to solve simultaneously + */ void set_b(int taskind, int start, int batch_size = MAX_BATCHES); + /** + * @brief Calculate estimated properties + * + * @param estimated_training_properties estimated properties + * @param taskind The task for used for the least squares regression + * @param start The offset needed from the head of the feature's training data to where the task starts + * @param batch_size number of systems to solve simultaneously + */ void set_prop_train_est( Kokkos::View<double* [MAX_BATCHES], Kokkos::LayoutLeft> estimated_training_properties, int taskind, @@ -101,5 +135,3 @@ public: */ int least_squares(int taskind, int start, int batch_size = MAX_BATCHES); }; - -#endif diff --git a/src/utils/cuda.hpp b/src/utils/cuda.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fa7655851e1d37b618238c41ad6e0e0e92101d58 --- /dev/null +++ b/src/utils/cuda.hpp @@ -0,0 +1,129 @@ +// Copyright 2021 Thomas A. R. Purcell +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @brief Wrapper for all CUDA related functionality. + * + * @author Sebastian Eibl <sebastian.eibl@mpcdf.mpg.de> + */ + +#pragma once + +#include <fmt/format.h> + +#include <iostream> + +#ifdef SISSO_ENABLE_CUDA +#include <cublas_v2.h> +#include <cuda_runtime.h> + +#define CHECK_CUDA_ERROR(ans) \ + { \ + check_cuda_error((ans), __FILE__, __LINE__); \ + } +inline void check_cuda_error(cudaError_t code, const char* file, int line, bool abort = true) +{ + if (code != cudaSuccess) + { + std::cerr << fmt::format("CUDA ERROR: {} {} {}", cudaGetErrorString(code), file, line) + << std::endl; + if (abort) exit(code); + } +} +#else +struct cublasContext +{ +}; +typedef struct cublasContext* cublasHandle_t; + +struct cudaError_t +{ +}; + +#define CHECK_CUDA_ERROR(ans) \ + { \ + } + +inline void check_cuda_error(cudaError_t /*code*/, + const char* /*file*/, + int /*line*/, + bool /*abort*/) +{ +} + +typedef enum +{ + CUBLAS_STATUS_SUCCESS = 0, + CUBLAS_STATUS_NOT_INITIALIZED = 1, + CUBLAS_STATUS_ALLOC_FAILED = 3, + CUBLAS_STATUS_INVALID_VALUE = 7, + CUBLAS_STATUS_ARCH_MISMATCH = 8, + CUBLAS_STATUS_MAPPING_ERROR = 11, + CUBLAS_STATUS_EXECUTION_FAILED = 13, + CUBLAS_STATUS_INTERNAL_ERROR = 14, + CUBLAS_STATUS_NOT_SUPPORTED = 15, + CUBLAS_STATUS_LICENSE_ERROR = 16 +} cublasStatus_t; + +inline cublasStatus_t cublasCreate(cublasHandle_t* handle) +{ + throw std::runtime_error("CUDA support is not enabled!"); +}; + +inline cublasStatus_t cublasDestroy(cublasHandle_t handle) +{ + throw std::runtime_error("CUDA support is not enabled!"); +} + +template <class T> +inline cudaError_t cudaMallocManaged(T** devPtr, size_t size, unsigned int flags = 0) +{ + throw std::runtime_error("CUDA support is not enabled!"); +} + +inline cudaError_t cudaFree(void* /*devPtr*/) +{ + throw std::runtime_error("CUDA support is not enabled!"); +} + +typedef enum +{ + CUBLAS_OP_N = 0, + CUBLAS_OP_T = 1, + CUBLAS_OP_C = 2, + CUBLAS_OP_HERMITAN = 2, /* synonym if CUBLAS_OP_C */ + CUBLAS_OP_CONJG = 3 /* conjugate, placeholder - not supported in the current release */ +} cublasOperation_t; + +inline cublasStatus_t cublasDgelsBatched(cublasHandle_t handle, + cublasOperation_t trans, + int m, + int n, + int nrhs, + double* const Aarray[], /*Device pointer*/ + int lda, + double* const Carray[], /*Device pointer*/ + int ldc, + int* info, + int* devInfoArray, /*Device pointer*/ + int batchSize) +{ + throw std::runtime_error("CUDA support is not enabled!"); +} + +inline cudaError_t cudaDeviceSynchronize(void) +{ + throw std::runtime_error("CUDA support is not enabled!"); +} +#endif \ No newline at end of file