diff --git a/src/GPU/check_for_gpu.F90 b/src/GPU/check_for_gpu.F90
index c841554bc508f9f040d3530fef7bf6b2b8a0db08..eeede0e0afb429930bd9520af3c0da873e6328dc 100644
--- a/src/GPU/check_for_gpu.F90
+++ b/src/GPU/check_for_gpu.F90
@@ -62,6 +62,19 @@ module mod_check_for_gpu
 
       gpuAvailable = .false.
 
+      if(cublasHandle .ne. -1) then
+	gpuAvailable = .true.
+	numberOfDevices = -1
+	if(myid == 0) then
+          print *, "Skipping GPU init, should have already been initialized "
+	endif
+	return
+      else
+	if(myid == 0) then
+	  print *, "Initializing the GPU devices"
+	endif
+      endif
+
       if (.not.(present(wantDebug))) then
         wantDebugMessage = .false.
       else
diff --git a/src/GPU/mod_cuda.F90 b/src/GPU/mod_cuda.F90
index dbe9ce5d4066acad79824396464f9c78291b992b..820b5b6a0756764522297b5f4e9e4047afc76861 100644
--- a/src/GPU/mod_cuda.F90
+++ b/src/GPU/mod_cuda.F90
@@ -55,7 +55,8 @@ module cuda_functions
   integer(kind=ik) :: cudaHostRegisterMapped
   integer(kind=ik) :: cudaMemcpyDeviceToDevice
 
-  integer(kind=C_intptr_T) :: cublasHandle
+  ! TODO global variable, has to be changed
+  integer(kind=C_intptr_T) :: cublasHandle = -1
 
   integer(kind=c_intptr_t), parameter :: size_of_double_real    = 8_rk8
 #ifdef WANT_SINGLE_PRECISION_REAL
diff --git a/src/elpa1/elpa1_template.F90 b/src/elpa1/elpa1_template.F90
index 3d6d1dbaf5caf3aab70eb39aa54f0ed7dd172901..7140b273f45bc2748e5b21ec87f6ac8f6849e7f6 100644
--- a/src/elpa1/elpa1_template.F90
+++ b/src/elpa1/elpa1_template.F90
@@ -220,10 +220,10 @@ function elpa_solve_evp_&
    wantDebug = debug == 1
    do_useGPU = .false.
 
-
+   
    if (useGPU) then
+     call obj%timer%start("check_for_gpu")
      if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
-
        do_useGPU = .true.
        ! set the neccessary parameters
        cudaMemcpyHostToDevice   = cuda_memcpyHostToDevice()
@@ -236,8 +236,10 @@ function elpa_solve_evp_&
        success = .false.
        return
      endif
+     call obj%timer%stop("check_for_gpu")
    endif
 
+
    do_useGPU_tridiag = do_useGPU
    do_useGPU_solve_tridi = do_useGPU
    do_useGPU_trans_ev = do_useGPU
diff --git a/src/elpa2/elpa2_template.F90 b/src/elpa2/elpa2_template.F90
index ac4f9f0f004ff9e792c8a59fa062ea074bd87e5f..70a101b0d90bc281b0e5a18714610385f5a2d02f 100644
--- a/src/elpa2/elpa2_template.F90
+++ b/src/elpa2/elpa2_template.F90
@@ -239,6 +239,7 @@
 
     do_useGPU = .false.
     if (useGPU) then
+      call obj%timer%start("check_for_gpu")
       if (check_for_gpu(my_pe,numberOfGPUDevices, wantDebug=wantDebug)) then
 
          do_useGPU = .true.
@@ -254,6 +255,7 @@
         success = .false.
         return
       endif
+      call obj%timer%stop("check_for_gpu")
     endif
 
     do_useGPU_bandred = do_useGPU