Commit 08caccbc authored by Andreas Marek's avatar Andreas Marek

Change kind of device pointers from c_size_t -> c_intptr_t

parent 362d17d1
......@@ -46,10 +46,10 @@ module aligned_mem
interface
function posix_memalign(memptr, alignment, size) result(error) bind(C, name="posix_memalign")
import c_int, c_size_t, c_ptr
import c_int, c_intptr_t, c_ptr
integer(kind=c_int) :: error
type(c_ptr), intent(inout) :: memptr
integer(kind=c_size_t), intent(in), value :: alignment, size
integer(kind=c_intptr_t), intent(in), value :: alignment, size
end function
end interface
......
......@@ -117,7 +117,7 @@
integer(kind=c_intptr_t) :: a_dev
integer(kind=c_intptr_t) :: bcast_buffer_dev
integer(kind=c_size_t) :: dev_offset
integer(kind=c_intptr_t) :: dev_offset
integer(kind=c_intptr_t) :: hh_dot_dev
integer(kind=c_intptr_t) :: hh_tau_dev
! Private variables in OMP regions (my_thread) should better be in the argument list!
......
......@@ -11,15 +11,15 @@
implicit none
real(kind=c_double), intent(inout) :: kernel_time ! MPI_WTIME always needs double
integer(kind=lik) :: kernel_flops
integer(kind=c_size_t) :: a_dev, bcast_buffer_dev, hh_tau_dev
integer(kind=c_intptr_t) :: a_dev, bcast_buffer_dev, hh_tau_dev
integer(kind=ik), intent(in) :: last_stripe_width
integer(kind=ik), intent(in) :: off, ncols, istripe
integer(kind=ik) :: nl, a_dim2, n_times, nbw, stripe_count, stripe_width
real(kind=c_double) :: ttt ! MPI_WTIME always needs double
integer(kind=ik) :: a_off
integer(kind=c_size_t) :: dev_offset, dev_offset_1, dev_offset_2
integer(kind=c_size_t), parameter :: size_of_datatype = size_of_&
integer(kind=c_intptr_t) :: dev_offset, dev_offset_1, dev_offset_2
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
......
......@@ -160,7 +160,7 @@
integer(kind=C_intptr_T) :: q_dev, tmp_dev, hvm_dev, tmat_dev
logical :: successCUDA
integer(kind=c_size_t), parameter :: size_of_datatype = size_of_&
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
......
......@@ -155,7 +155,7 @@
integer(kind=ik) :: istep, i, j, l_col_beg, l_col_end, l_row_beg, l_row_end
integer(kind=ik) :: tile_size, l_rows_per_tile, l_cols_per_tile
integer(kind=c_size_t) :: a_offset
integer(kind=c_intptr_t) :: a_offset
#ifdef WITH_OPENMP
integer(kind=ik) :: my_thread, n_threads, max_threads, n_iter
......@@ -208,7 +208,7 @@
#endif
integer(kind=ik) :: istat
character(200) :: errorMessage
integer(kind=c_size_t), parameter :: size_of_datatype = size_of_&
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
......@@ -852,8 +852,8 @@
!successCUDA = cuda_threadsynchronize()
!check_memcpy_cuda("tridiag: a_dev 4a5a", successCUDA)
successCUDA = cuda_memcpy(a_dev + a_offset, int(loc(a_mat(l_rows, l_cols)),kind=c_size_t), &
int(1 * size_of_datatype, kind=c_size_t), cudaMemcpyHostToDevice)
successCUDA = cuda_memcpy(a_dev + a_offset, int(loc(a_mat(l_rows, l_cols)),kind=c_intptr_t), &
int(1 * size_of_datatype, kind=c_intptr_t), cudaMemcpyHostToDevice)
check_memcpy_cuda("tridiag: a_dev 4", successCUDA)
endif
endif
......
......@@ -198,9 +198,9 @@
#endif
integer(kind=ik) :: ierr
integer(kind=ik) :: cur_l_rows, cur_l_cols, vmr_size, umc_size
integer(kind=c_size_t) :: lc_start, lc_end
integer(kind=c_intptr_t) :: lc_start, lc_end
#if COMPLEXCASE == 1
integer(kind=c_size_t) :: lce_1, lcs_1, lre_1
integer(kind=c_intptr_t) :: lce_1, lcs_1, lre_1
#endif
integer(kind=ik) :: lr_end
integer(kind=ik) :: na_cols
......@@ -219,7 +219,7 @@
#endif
integer(kind=ik) :: mystart, myend, m_way, n_way, work_per_thread, m_id, n_id, n_threads, &
ii, pp, transformChunkSize
integer(kind=c_size_t), parameter :: size_of_datatype = size_of_&
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
......@@ -581,26 +581,26 @@
lda * size_of_datatype, &
#endif
#if COMPLEXCASE == 1
int((lda*size_of_datatype),kind=c_size_t), &
int((lda*size_of_datatype),kind=c_intptr_t), &
#endif
#if REALCASE == 1
(a_dev + ((lc_start-1) * lda*size_of_datatype)), &
#endif
#if COMPLEXCASE == 1
(a_dev + int( ( (lc_start-1) * lda*size_of_datatype),kind=c_size_t )), &
(a_dev + int( ( (lc_start-1) * lda*size_of_datatype),kind=c_intptr_t )), &
#endif
#if REALCASE == 1
lda*size_of_datatype, lr_end*size_of_datatype, &
#endif
#if COMPLEXCASE == 1
int(lda*size_of_datatype,kind=c_size_t), &
int(lr_end*size_of_datatype,kind=c_size_t), &
int(lda*size_of_datatype,kind=c_intptr_t), &
int(lr_end*size_of_datatype,kind=c_intptr_t), &
#endif
#if REALCASE == 1
(lc_end - lc_start+1), cudaMemcpyDeviceToHost)
#endif
#if COMPLEXCASE == 1
int((lc_end - lc_start+1),kind=c_size_t),int(cudaMemcpyDeviceToHost,kind=c_int))
int((lc_end - lc_start+1),kind=c_intptr_t),int(cudaMemcpyDeviceToHost,kind=c_int))
#endif
......@@ -950,26 +950,26 @@
((lc_start-1)*lda*size_of_datatype)), &
#endif
#if COMPLEXCASE == 1
int(((lc_start-1)*lda*size_of_datatype),kind=c_size_t)), &
int(((lc_start-1)*lda*size_of_datatype),kind=c_intptr_t)), &
#endif
#if REALCASE == 1
lda*size_of_datatype, loc(a(1, lc_start)), &
#endif
#if COMPLEXCASE == 1
int(lda*size_of_datatype,kind=c_size_t), loc(a(1,lc_start)), &
int(lda*size_of_datatype,kind=c_intptr_t), loc(a(1,lc_start)), &
#endif
#if REALCASE == 1
lda*size_of_datatype, lr_end*size_of_datatype, &
#endif
#if COMPLEXCASE == 1
int(lda*size_of_datatype,kind=c_size_t), &
int(lr_end*size_of_datatype,kind=c_size_t), &
int(lda*size_of_datatype,kind=c_intptr_t), &
int(lr_end*size_of_datatype,kind=c_intptr_t), &
#endif
#if REALCASE == 1
(lc_end - lc_start+1),cudaMemcpyHostToDevice)
#endif
#if COMPLEXCASE == 1
int((lc_end - lc_start+1),kind=c_size_t), &
int((lc_end - lc_start+1),kind=c_intptr_t), &
int(cudaMemcpyHostToDevice,kind=c_int))
#endif
......
......@@ -182,7 +182,7 @@
integer(kind=ik) :: istat
character(200) :: errorMessage
logical :: successCUDA
integer(kind=c_size_t), parameter :: size_of_datatype = size_of_&
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
......
......@@ -154,8 +154,8 @@
integer(kind=c_intptr_t) :: aIntern_dev
integer(kind=c_intptr_t) :: bcast_buffer_dev
integer(kind=c_size_t) :: num
integer(kind=c_size_t) :: dev_offset, dev_offset_1, dev_offset_2
integer(kind=c_intptr_t) :: num
integer(kind=c_intptr_t) :: dev_offset, dev_offset_1, dev_offset_2
integer(kind=c_intptr_t) :: row_dev
integer(kind=c_intptr_t) :: row_group_dev
integer(kind=c_intptr_t) :: hh_tau_dev
......@@ -225,7 +225,7 @@
#ifndef WITH_MPI
integer(kind=ik) :: j1
#endif
integer(kind=c_size_t), parameter :: size_of_datatype = size_of_&
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
......@@ -593,7 +593,7 @@
#endif
#ifdef WITH_OPENMP
if (posix_memalign(aIntern_ptr, 64_C_SIZE_T, stripe_width*a_dim2*stripe_count*max_threads* &
if (posix_memalign(aIntern_ptr, 64_c_intptr_t, stripe_width*a_dim2*stripe_count*max_threads* &
#if REALCASE == 1
C_SIZEOF(a_real)) /= 0) then
#endif
......@@ -613,7 +613,7 @@
#else /* WITH_OPENMP */
if (posix_memalign(aIntern_ptr, 64_C_SIZE_T, stripe_width*a_dim2*stripe_count* &
if (posix_memalign(aIntern_ptr, 64_c_intptr_t, stripe_width*a_dim2*stripe_count* &
#if REALCASE == 1
C_SIZEOF(a_real)) /= 0) then
#endif
......
......@@ -259,9 +259,9 @@ module cuda_c_kernel
implicit none
integer(kind=c_int), value :: nev, nb, ldq, off, ncols
integer(kind=c_size_t), value :: q
integer(kind=c_size_t), value :: hh_dot
integer(C_SIZE_T), value :: hh_tau ,hh
integer(kind=c_intptr_t), value :: q
integer(kind=c_intptr_t), value :: hh_dot
integer(c_intptr_t), value :: hh_tau ,hh
end subroutine
end interface
......@@ -274,9 +274,9 @@ module cuda_c_kernel
implicit none
integer(kind=c_int), value :: nev, nb, ldq, off, ncols
integer(kind=c_size_t), value :: q
integer(kind=c_size_t), value :: hh_dot
integer(C_SIZE_T), value :: hh_tau ,hh
integer(kind=c_intptr_t), value :: q
integer(kind=c_intptr_t), value :: hh_dot
integer(c_intptr_t), value :: hh_tau ,hh
end subroutine
end interface
......@@ -290,8 +290,8 @@ module cuda_c_kernel
implicit none
integer(kind=c_int), value :: nev, nb, ldq, off, ncols
integer(kind=c_size_t), value :: q
integer(kind=c_size_t), value :: hh_tau ,hh
integer(kind=c_intptr_t), value :: q
integer(kind=c_intptr_t), value :: hh_tau ,hh
end subroutine
end interface
......@@ -305,8 +305,8 @@ module cuda_c_kernel
implicit none
integer(kind=c_int), value :: nev, nb, ldq, off, ncols
integer(kind=c_size_t), value :: q
integer(kind=c_size_t), value :: hh_tau ,hh
integer(kind=c_intptr_t), value :: q
integer(kind=c_intptr_t), value :: hh_tau ,hh
end subroutine
end interface
......@@ -321,8 +321,8 @@ module cuda_c_kernel
implicit none
integer(kind=c_int), value :: nev, nb, ldq, off, ncols
integer(kind=c_size_t), value :: q
integer(kind=c_size_t), value :: hh_tau ,hh, hh_dot
integer(kind=c_intptr_t), value :: q
integer(kind=c_intptr_t), value :: hh_tau ,hh, hh_dot
end subroutine
end interface
......@@ -336,8 +336,8 @@ module cuda_c_kernel
implicit none
integer(kind=c_int), value :: nev, nb, ldq, off, ncols
integer(kind=c_size_t), value :: q
integer(kind=c_size_t), value :: hh_tau ,hh, hh_dot
integer(kind=c_intptr_t), value :: q
integer(kind=c_intptr_t), value :: hh_tau ,hh, hh_dot
end subroutine
end interface
......@@ -448,8 +448,8 @@ module cuda_c_kernel
use iso_c_binding
implicit none
integer(kind=c_size_t), value :: hh
integer(kind=c_size_t), value :: hh_tau
integer(kind=c_intptr_t), value :: hh
integer(kind=c_intptr_t), value :: hh_tau
integer(kind=c_int), value :: nb, n
integer(kind=c_int), value :: is_zero
......@@ -463,8 +463,8 @@ module cuda_c_kernel
use iso_c_binding
implicit none
integer(kind=c_size_t), value :: hh
integer(kind=c_size_t), value :: hh_tau
integer(kind=c_intptr_t), value :: hh
integer(kind=c_intptr_t), value :: hh_tau
integer(kind=c_int), value :: nb, n
integer(kind=c_int), value :: is_zero
......@@ -575,8 +575,8 @@ module cuda_c_kernel
use iso_c_binding
implicit none
integer(kind=c_size_t), value :: hh
integer(kind=c_size_t), value :: hh_tau
integer(kind=c_intptr_t), value :: hh
integer(kind=c_intptr_t), value :: hh_tau
integer(kind=c_int), value :: nb, n
integer(kind=c_int), value :: is_zero
......@@ -592,8 +592,8 @@ module cuda_c_kernel
use iso_c_binding
implicit none
integer(kind=c_size_t), value :: hh
integer(kind=c_size_t), value :: hh_tau
integer(kind=c_intptr_t), value :: hh
integer(kind=c_intptr_t), value :: hh_tau
integer(kind=c_int), value :: nb, n
integer(kind=c_int), value :: is_zero
......@@ -801,9 +801,9 @@ module cuda_c_kernel
implicit none
integer(kind=c_int) :: nev, nb, ldq, off, ncols
integer(kind=c_size_t) :: q
integer(kind=c_size_t) :: hh_dot
integer(C_SIZE_T) :: hh_tau ,hh
integer(kind=c_intptr_t) :: q
integer(kind=c_intptr_t) :: hh_dot
integer(c_intptr_t) :: hh_tau ,hh
#ifdef WITH_GPU_VERSION
call launch_compute_hh_trafo_c_kernel_real_c_double(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
#endif
......@@ -816,9 +816,9 @@ module cuda_c_kernel
implicit none
integer(kind=c_int) :: nev, nb, ldq, off, ncols
integer(kind=c_size_t) :: q
integer(kind=c_size_t) :: hh_dot
integer(C_SIZE_T) :: hh_tau ,hh
integer(kind=c_intptr_t) :: q
integer(kind=c_intptr_t) :: hh_dot
integer(c_intptr_t) :: hh_tau ,hh
#ifdef WITH_GPU_VERSION
call launch_compute_hh_trafo_c_kernel_real_c_single(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
#endif
......@@ -832,8 +832,8 @@ module cuda_c_kernel
implicit none
integer(kind=c_int) :: nev, nb, ldq, off, ncols
integer(kind=c_size_t) :: q
integer(kind=c_size_t) :: hh_tau ,hh
integer(kind=c_intptr_t) :: q
integer(kind=c_intptr_t) :: hh_tau ,hh
#ifdef WITH_GPU_VERSION
call launch_compute_hh_trafo_c_kernel_complex_c_double(q, hh, hh_tau, nev, nb,ldq,off, ncols)
#endif
......@@ -846,8 +846,8 @@ module cuda_c_kernel
implicit none
integer(kind=c_int) :: nev, nb, ldq, off, ncols
integer(kind=c_size_t) :: q
integer(kind=c_size_t) :: hh_tau ,hh
integer(kind=c_intptr_t) :: q
integer(kind=c_intptr_t) :: hh_tau ,hh
#ifdef WITH_GPU_VERSION
call launch_compute_hh_trafo_c_kernel_complex_c_single(q, hh, hh_tau, nev, nb,ldq,off, ncols)
#endif
......@@ -862,8 +862,8 @@ module cuda_c_kernel
implicit none
integer(kind=c_int) :: nev, nb, ldq, off, ncols
integer(kind=c_size_t) :: q
integer(kind=c_size_t) :: hh_tau ,hh, hh_dot
integer(kind=c_intptr_t) :: q
integer(kind=c_intptr_t) :: hh_tau ,hh, hh_dot
#ifdef WITH_GPU_VERSION
call launch_compute_hh_trafo_c_kernel_complex_1_c_double(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
#endif
......@@ -876,8 +876,8 @@ module cuda_c_kernel
implicit none
integer(kind=c_int) :: nev, nb, ldq, off, ncols
integer(kind=c_size_t) :: q
integer(kind=c_size_t) :: hh_tau ,hh, hh_dot
integer(kind=c_intptr_t) :: q
integer(kind=c_intptr_t) :: hh_tau ,hh, hh_dot
#ifdef WITH_GPU_VERSION
call launch_compute_hh_trafo_c_kernel_complex_1_c_single(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
#endif
......@@ -991,8 +991,8 @@ module cuda_c_kernel
use iso_c_binding
implicit none
integer(kind=c_size_t) :: hh
integer(kind=c_size_t) :: hh_tau
integer(kind=c_intptr_t) :: hh
integer(kind=c_intptr_t) :: hh_tau
integer(kind=c_int) :: nb, n
integer(kind=c_int) :: is_zero
#ifdef WITH_GPU_VERSION
......@@ -1006,8 +1006,8 @@ module cuda_c_kernel
use iso_c_binding
implicit none
integer(kind=c_size_t) :: hh
integer(kind=c_size_t) :: hh_tau
integer(kind=c_intptr_t) :: hh
integer(kind=c_intptr_t) :: hh_tau
integer(kind=c_int) :: nb, n
integer(kind=c_int) :: is_zero
#ifdef WITH_GPU_VERSION
......@@ -1121,8 +1121,8 @@ module cuda_c_kernel
use iso_c_binding
implicit none
integer(kind=c_size_t) :: hh
integer(kind=c_size_t) :: hh_tau
integer(kind=c_intptr_t) :: hh
integer(kind=c_intptr_t) :: hh_tau
integer(kind=c_int) :: nb, n
integer(kind=c_int) :: is_zero
#ifdef WITH_GPU_VERSION
......@@ -1136,8 +1136,8 @@ module cuda_c_kernel
use iso_c_binding
implicit none
integer(kind=c_size_t) :: hh
integer(kind=c_size_t) :: hh_tau
integer(kind=c_intptr_t) :: hh
integer(kind=c_intptr_t) :: hh_tau
integer(kind=c_int) :: nb, n
integer(kind=c_int) :: is_zero
#ifdef WITH_GPU_VERSION
......
This diff is collapsed.
......@@ -55,14 +55,14 @@ module cuda_functions
integer(kind=ik) :: cudaHostRegisterMapped
integer(kind=ik) :: cudaMemcpyDeviceToDevice
integer(kind=c_size_t), parameter :: size_of_double_real = 8_rk8
integer(kind=c_intptr_t), parameter :: size_of_double_real = 8_rk8
#ifdef WANT_SINGLE_PRECISION_REAL
integer(kind=c_size_t), parameter :: size_of_single_real = 4_rk4
integer(kind=c_intptr_t), parameter :: size_of_single_real = 4_rk4
#endif
integer(kind=c_size_t), parameter :: size_of_double_complex = 16_ck8
integer(kind=c_intptr_t), parameter :: size_of_double_complex = 16_ck8
#ifdef WANT_SINGLE_PRECISION_COMPLEX
integer(kind=c_size_t), parameter :: size_of_single_complex = 8_ck4
integer(kind=c_intptr_t), parameter :: size_of_single_complex = 8_ck4
#endif
! functions to set and query the CUDA devices
......@@ -165,7 +165,7 @@ module cuda_functions
implicit none
integer(kind=C_intptr_t), value :: dst
integer(kind=C_intptr_t), value :: src
integer(kind=C_SIZE_T), intent(in), value :: size
integer(kind=c_intptr_t), intent(in), value :: size
integer(kind=C_INT), intent(in), value :: dir
integer(kind=C_INT) :: istat
......@@ -181,11 +181,11 @@ module cuda_functions
implicit none
integer(kind=C_intptr_T), value :: dst
integer(kind=C_SIZE_T), intent(in), value :: dpitch
integer(kind=c_intptr_t), intent(in), value :: dpitch
integer(kind=C_intptr_T), value :: src
integer(kind=C_SIZE_T), intent(in), value :: spitch
integer(kind=C_SIZE_T), intent(in), value :: width
integer(kind=C_SIZE_T), intent(in), value :: height
integer(kind=c_intptr_t), intent(in), value :: spitch
integer(kind=c_intptr_t), intent(in), value :: width
integer(kind=c_intptr_t), intent(in), value :: height
integer(kind=C_INT), intent(in), value :: dir
integer(kind=C_INT) :: istat
......@@ -215,7 +215,7 @@ module cuda_functions
implicit none
integer(kind=C_intptr_T) :: a
integer(kind=C_SIZE_T), intent(in), value :: width_height
integer(kind=c_intptr_t), intent(in), value :: width_height
integer(kind=C_INT) :: istat
end function cuda_malloc_c
......@@ -231,7 +231,7 @@ module cuda_functions
integer(kind=C_intptr_T), value :: a
integer(kind=C_INT), value :: val
integer(kind=C_SIZE_T), intent(in), value :: size
integer(kind=c_intptr_t), intent(in), value :: size
integer(kind=C_INT) :: istat
end function cuda_memset_c
......@@ -535,7 +535,7 @@ module cuda_functions
implicit none
integer(kind=C_intptr_t) :: a
integer(kind=C_SIZE_T), intent(in) :: width_height
integer(kind=c_intptr_t), intent(in) :: width_height
logical :: success
#ifdef WITH_GPU_VERSION
success = cuda_malloc_c(a, width_height) /= 0
......@@ -566,12 +566,12 @@ module cuda_functions
integer(kind=c_intptr_t) :: a
integer(kind=ik) :: val
integer(kind=c_size_t), intent(in) :: size
integer(kind=c_intptr_t), intent(in) :: size
integer(kind=C_INT) :: istat
logical :: success
#ifdef WITH_GPU_VERSION
success= cuda_memset_c(a, int(val,kind=c_int), int(size,kind=c_size_t)) /=0
success= cuda_memset_c(a, int(val,kind=c_int), int(size,kind=c_intptr_t)) /=0
#else
success = .true.
#endif
......@@ -645,7 +645,7 @@ module cuda_functions
implicit none
integer(kind=C_intptr_t) :: dst
integer(kind=C_intptr_t) :: src
integer(kind=C_SIZE_T), intent(in) :: size
integer(kind=c_intptr_t), intent(in) :: size
integer(kind=C_INT), intent(in) :: dir
logical :: success
......@@ -663,11 +663,11 @@ module cuda_functions
implicit none
integer(kind=C_intptr_T) :: dst
integer(kind=C_SIZE_T), intent(in) :: dpitch
integer(kind=c_intptr_t), intent(in) :: dpitch
integer(kind=C_intptr_T) :: src
integer(kind=C_SIZE_T), intent(in) :: spitch
integer(kind=C_SIZE_T), intent(in) :: width
integer(kind=C_SIZE_T), intent(in) :: height
integer(kind=c_intptr_t), intent(in) :: spitch
integer(kind=c_intptr_t), intent(in) :: width
integer(kind=c_intptr_t), intent(in) :: height
integer(kind=C_INT), intent(in) :: dir
logical :: success
#ifdef WITH_GPU_VERSION
......
......@@ -111,7 +111,7 @@ subroutine redist_band_&
logical :: successCUDA
integer(kind=c_intptr_t) :: a_dev
integer(kind=c_size_t), parameter :: size_of_datatype = size_of_&
integer(kind=c_intptr_t), parameter :: size_of_datatype = size_of_&
&PRECISION&
&_&
&MATH_DATATYPE
......@@ -131,7 +131,7 @@ subroutine redist_band_&
#if COMPLEXCASE == 1
loc(c_a(1,1)), &
#endif
int(a_dev,kind=c_size_t), int(lda*matrixCols* size_of_datatype, kind=c_size_t), &
int(a_dev,kind=c_intptr_t), int(lda*matrixCols* size_of_datatype, kind=c_intptr_t), &
cudaMemcpyDeviceToHost)
if (.not.(successCUDA)) then
print *,"redist_band_&
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment