diff --git a/src/GPU/check_for_gpu.F90 b/src/GPU/check_for_gpu.F90 index c841554bc508f9f040d3530fef7bf6b2b8a0db08..eeede0e0afb429930bd9520af3c0da873e6328dc 100644 --- a/src/GPU/check_for_gpu.F90 +++ b/src/GPU/check_for_gpu.F90 @@ -62,6 +62,19 @@ module mod_check_for_gpu gpuAvailable = .false. + if(cublasHandle .ne. -1) then + gpuAvailable = .true. + numberOfDevices = -1 + if(myid == 0) then + print *, "Skipping GPU init, should have already been initialized " + endif + return + else + if(myid == 0) then + print *, "Initializing the GPU devices" + endif + endif + if (.not.(present(wantDebug))) then wantDebugMessage = .false. else diff --git a/src/GPU/mod_cuda.F90 b/src/GPU/mod_cuda.F90 index dbe9ce5d4066acad79824396464f9c78291b992b..820b5b6a0756764522297b5f4e9e4047afc76861 100644 --- a/src/GPU/mod_cuda.F90 +++ b/src/GPU/mod_cuda.F90 @@ -55,7 +55,8 @@ module cuda_functions integer(kind=ik) :: cudaHostRegisterMapped integer(kind=ik) :: cudaMemcpyDeviceToDevice - integer(kind=C_intptr_T) :: cublasHandle + ! TODO global variable, has to be changed + integer(kind=C_intptr_T) :: cublasHandle = -1 integer(kind=c_intptr_t), parameter :: size_of_double_real = 8_rk8 #ifdef WANT_SINGLE_PRECISION_REAL diff --git a/src/elpa1/elpa1_template.F90 b/src/elpa1/elpa1_template.F90 index 3d6d1dbaf5caf3aab70eb39aa54f0ed7dd172901..7140b273f45bc2748e5b21ec87f6ac8f6849e7f6 100644 --- a/src/elpa1/elpa1_template.F90 +++ b/src/elpa1/elpa1_template.F90 @@ -220,10 +220,10 @@ function elpa_solve_evp_& wantDebug = debug == 1 do_useGPU = .false. - + if (useGPU) then + call obj%timer%start("check_for_gpu") if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then - do_useGPU = .true. ! set the neccessary parameters cudaMemcpyHostToDevice = cuda_memcpyHostToDevice() @@ -236,8 +236,10 @@ function elpa_solve_evp_& success = .false. return endif + call obj%timer%stop("check_for_gpu") endif + do_useGPU_tridiag = do_useGPU do_useGPU_solve_tridi = do_useGPU do_useGPU_trans_ev = do_useGPU diff --git a/src/elpa2/elpa2_template.F90 b/src/elpa2/elpa2_template.F90 index ac4f9f0f004ff9e792c8a59fa062ea074bd87e5f..70a101b0d90bc281b0e5a18714610385f5a2d02f 100644 --- a/src/elpa2/elpa2_template.F90 +++ b/src/elpa2/elpa2_template.F90 @@ -239,6 +239,7 @@ do_useGPU = .false. if (useGPU) then + call obj%timer%start("check_for_gpu") if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then do_useGPU = .true. @@ -254,6 +255,7 @@ success = .false. return endif + call obj%timer%stop("check_for_gpu") endif do_useGPU_bandred = do_useGPU