Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
bfad08b8
Commit
bfad08b8
authored
Feb 24, 2021
by
Andreas Marek
Browse files
Rename WITH_GPU_VERSION -> WITH_NVIDIA_GPU_VERSION
parent
c07fb4d9
Changes
7
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
bfad08b8
...
...
@@ -137,7 +137,7 @@ libelpa@SUFFIX@_private_la_SOURCES += \
src/helpers/timer_dummy.F90
endif
if
WITH_GPU_VERSION
if
WITH_
NVIDIA_
GPU_VERSION
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/GPU/CUDA/elpa_index_gpu.cu src/GPU/CUDA/cudaFunctions.cu src/GPU/CUDA/cuUtils.cu src/elpa2/GPU/CUDA/ev_tridi_band_gpu_real.cu src/elpa2/GPU/CUDA/ev_tridi_band_gpu_complex.cu
endif
...
...
configure.ac
View file @
bfad08b8
...
...
@@ -1531,9 +1531,9 @@ m4_foreach_w([elpa_m4_kernel],elpa_m4_all_kernels,[
AC_SUBST([ELPA_2STAGE_]m4_toupper(elpa_m4_kernel)[_COMPILED])
])
AM_CONDITIONAL([WITH_GPU_VERSION],[test x"$use_real_gpu" = x"yes" -o x"$use_complex_gpu" = x"yes"])
AM_CONDITIONAL([WITH_
NVIDIA_
GPU_VERSION],[test x"$use_real_gpu" = x"yes" -o x"$use_complex_gpu" = x"yes"])
if test x"$use_real_gpu" = x"yes" -o x"$use_complex_gpu" = x"yes" ; then
AC_DEFINE([WITH_GPU_VERSION],[1],[enable GPU support])
AC_DEFINE([WITH_
NVIDIA_
GPU_VERSION],[1],[enable GPU support])
AC_DEFINE([WITH_GPU_KERNEL],[1],[GPU kernel should be build])
ELPA_2STAGE_COMPLEX_GPU_COMPILED=1
ELPA_2STAGE_REAL_GPU_COMPILED=1
...
...
generate_automake_test_programs.py
View file @
bfad08b8
...
...
@@ -155,7 +155,7 @@ for lang, m, g, gid, q, t, p, d, s, lay, spl in product(sorted(language_flag.key
endifs
+=
1
if
(
g
==
1
):
print
(
"if WITH_GPU_VERSION"
)
print
(
"if WITH_
NVIDIA_
GPU_VERSION"
)
endifs
+=
1
if
(
lay
==
"all_layouts"
):
...
...
src/GPU/CUDA/cudaFunctions.cu
View file @
bfad08b8
...
...
@@ -72,7 +72,7 @@
#define debugmessage(x, ...)
#endif
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
extern
"C"
{
int
cublasCreateFromC
(
intptr_t
*
cublas_handle
)
{
...
...
@@ -479,4 +479,4 @@ extern "C" {
}
#endif
/* WITH_GPU_VERSION */
#endif
/* WITH_
NVIDIA_
GPU_VERSION */
src/GPU/CUDA/mod_cuda.F90
View file @
bfad08b8
...
...
@@ -566,7 +566,7 @@ module cuda_functions
integer
(
kind
=
C_intptr_t
)
::
handle
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cublas_create_c
(
handle
)
/
=
0
#else
success
=
.true.
...
...
@@ -579,7 +579,7 @@ module cuda_functions
integer
(
kind
=
C_intptr_t
)
::
handle
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cublas_destroy_c
(
handle
)
/
=
0
#else
success
=
.true.
...
...
@@ -593,7 +593,7 @@ module cuda_functions
integer
(
kind
=
ik
),
intent
(
in
)
::
n
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_setdevice_c
(
int
(
n
,
kind
=
c_int
))
/
=
0
#else
success
=
.true.
...
...
@@ -607,7 +607,7 @@ module cuda_functions
integer
(
kind
=
ik
)
::
n
integer
(
kind
=
c_int
)
::
nCasted
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_getdevicecount_c
(
nCasted
)
/
=
0
n
=
int
(
nCasted
)
#else
...
...
@@ -622,7 +622,7 @@ module cuda_functions
implicit
none
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_devicesynchronize_c
()
/
=
0
#else
success
=
.true.
...
...
@@ -638,7 +638,7 @@ module cuda_functions
integer
(
kind
=
C_intptr_t
)
::
a
integer
(
kind
=
c_intptr_t
),
intent
(
in
)
::
width_height
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_malloc_c
(
a
,
width_height
)
/
=
0
#else
success
=
.true.
...
...
@@ -652,7 +652,7 @@ module cuda_functions
implicit
none
integer
(
kind
=
C_intptr_T
)
::
a
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_free_c
(
a
)
/
=
0
#else
success
=
.true.
...
...
@@ -667,7 +667,7 @@ module cuda_functions
type
(
c_ptr
)
::
a
integer
(
kind
=
c_intptr_t
),
intent
(
in
)
::
width_height
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_malloc_host_c
(
a
,
width_height
)
/
=
0
#else
success
=
.true.
...
...
@@ -681,7 +681,7 @@ module cuda_functions
implicit
none
type
(
c_ptr
),
value
::
a
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_free_host_c
(
a
)
/
=
0
#else
success
=
.true.
...
...
@@ -700,7 +700,7 @@ module cuda_functions
integer
(
kind
=
C_INT
)
::
istat
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_memset_c
(
a
,
int
(
val
,
kind
=
c_int
),
int
(
size
,
kind
=
c_intptr_t
))
/
=
0
#else
success
=
.true.
...
...
@@ -713,7 +713,7 @@ module cuda_functions
use
,
intrinsic
::
iso_c_binding
implicit
none
integer
(
kind
=
ik
)
::
flag
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
flag
=
int
(
cuda_memcpyDeviceToDevice_c
())
#else
flag
=
0
...
...
@@ -725,7 +725,7 @@ module cuda_functions
use
precision
implicit
none
integer
(
kind
=
ik
)
::
flag
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
flag
=
int
(
cuda_memcpyHostToDevice_c
())
#else
flag
=
0
...
...
@@ -737,7 +737,7 @@ module cuda_functions
use
precision
implicit
none
integer
(
kind
=
ik
)
::
flag
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
flag
=
int
(
cuda_memcpyDeviceToHost_c
())
#else
flag
=
0
...
...
@@ -749,7 +749,7 @@ module cuda_functions
use
precision
implicit
none
integer
(
kind
=
ik
)
::
flag
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
flag
=
int
(
cuda_hostRegisterDefault_c
())
#else
flag
=
0
...
...
@@ -761,7 +761,7 @@ module cuda_functions
use
precision
implicit
none
integer
(
kind
=
ik
)
::
flag
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
flag
=
int
(
cuda_hostRegisterPortable_c
())
#else
flag
=
0
...
...
@@ -773,7 +773,7 @@ module cuda_functions
use
precision
implicit
none
integer
(
kind
=
ik
)
::
flag
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
flag
=
int
(
cuda_hostRegisterMapped_c
())
#else
flag
=
0
...
...
@@ -791,7 +791,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
dir
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_memcpy_c
(
dst
,
src
,
size
,
dir
)
/
=
0
#else
success
=
.true.
...
...
@@ -812,7 +812,7 @@ module cuda_functions
integer
(
kind
=
c_intptr_t
),
intent
(
in
)
::
height
integer
(
kind
=
C_INT
),
intent
(
in
)
::
dir
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_memcpy2d_c
(
dst
,
dpitch
,
src
,
spitch
,
width
,
height
,
dir
)
/
=
0
#else
success
=
.true.
...
...
@@ -829,7 +829,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
flag
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_host_register_c
(
a
,
size
,
flag
)
/
=
0
#else
success
=
.true.
...
...
@@ -844,7 +844,7 @@ module cuda_functions
integer
(
kind
=
C_intptr_t
)
::
a
logical
::
success
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
success
=
cuda_host_unregister_c
(
a
)
/
=
0
#else
success
=
.true.
...
...
@@ -861,7 +861,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
ldb
,
ldc
real
(
kind
=
C_DOUBLE
)
::
alpha
,
beta
integer
(
kind
=
C_intptr_T
)
::
a
,
b
,
c
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_dgemm_c
(
cublasHandle
,
cta
,
ctb
,
m
,
n
,
k
,
alpha
,
a
,
lda
,
b
,
ldb
,
beta
,
c
,
ldc
)
#endif
end
subroutine
cublas_dgemm
...
...
@@ -875,7 +875,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
ldb
,
ldc
real
(
kind
=
C_FLOAT
)
::
alpha
,
beta
integer
(
kind
=
C_intptr_T
)
::
a
,
b
,
c
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_sgemm_c
(
cublasHandle
,
cta
,
ctb
,
m
,
n
,
k
,
alpha
,
a
,
lda
,
b
,
ldb
,
beta
,
c
,
ldc
)
#endif
end
subroutine
cublas_sgemm
...
...
@@ -890,7 +890,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
ldb
real
(
kind
=
C_DOUBLE
)
::
alpha
integer
(
kind
=
C_intptr_T
)
::
a
,
b
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_dtrmm_c
(
cublasHandle
,
side
,
uplo
,
trans
,
diag
,
m
,
n
,
alpha
,
a
,
lda
,
b
,
ldb
)
#endif
end
subroutine
cublas_dtrmm
...
...
@@ -905,7 +905,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
ldb
real
(
kind
=
C_FLOAT
)
::
alpha
integer
(
kind
=
C_intptr_T
)
::
a
,
b
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_strmm_c
(
cublasHandle
,
side
,
uplo
,
trans
,
diag
,
m
,
n
,
alpha
,
a
,
lda
,
b
,
ldb
)
#endif
end
subroutine
cublas_strmm
...
...
@@ -920,7 +920,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
ldb
,
ldc
complex
(
kind
=
C_DOUBLE_COMPLEX
)
::
alpha
,
beta
integer
(
kind
=
C_intptr_T
)
::
a
,
b
,
c
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_zgemm_c
(
cublasHandle
,
cta
,
ctb
,
m
,
n
,
k
,
alpha
,
a
,
lda
,
b
,
ldb
,
beta
,
c
,
ldc
)
#endif
end
subroutine
cublas_zgemm
...
...
@@ -935,7 +935,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
ldb
,
ldc
complex
(
kind
=
C_FLOAT_COMPLEX
)
::
alpha
,
beta
integer
(
kind
=
C_intptr_T
)
::
a
,
b
,
c
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_cgemm_c
(
cublasHandle
,
cta
,
ctb
,
m
,
n
,
k
,
alpha
,
a
,
lda
,
b
,
ldb
,
beta
,
c
,
ldc
)
#endif
end
subroutine
cublas_cgemm
...
...
@@ -950,7 +950,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
ldb
complex
(
kind
=
C_DOUBLE_COMPLEX
)
::
alpha
integer
(
kind
=
C_intptr_T
)
::
a
,
b
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_ztrmm_c
(
cublasHandle
,
side
,
uplo
,
trans
,
diag
,
m
,
n
,
alpha
,
a
,
lda
,
b
,
ldb
)
#endif
end
subroutine
cublas_ztrmm
...
...
@@ -965,7 +965,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
ldb
complex
(
kind
=
C_FLOAT_COMPLEX
)
::
alpha
integer
(
kind
=
C_intptr_T
)
::
a
,
b
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_ctrmm_c
(
cublasHandle
,
side
,
uplo
,
trans
,
diag
,
m
,
n
,
alpha
,
a
,
lda
,
b
,
ldb
)
#endif
end
subroutine
cublas_ctrmm
...
...
@@ -979,7 +979,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
incx
,
incy
real
(
kind
=
C_DOUBLE
)
::
alpha
,
beta
integer
(
kind
=
C_intptr_T
)
::
a
,
x
,
y
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_dgemv_c
(
cublasHandle
,
cta
,
m
,
n
,
alpha
,
a
,
lda
,
x
,
incx
,
beta
,
y
,
incy
)
#endif
end
subroutine
cublas_dgemv
...
...
@@ -993,7 +993,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
incx
,
incy
real
(
kind
=
C_FLOAT
)
::
alpha
,
beta
integer
(
kind
=
C_intptr_T
)
::
a
,
x
,
y
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_sgemv_c
(
cublasHandle
,
cta
,
m
,
n
,
alpha
,
a
,
lda
,
x
,
incx
,
beta
,
y
,
incy
)
#endif
end
subroutine
cublas_sgemv
...
...
@@ -1007,7 +1007,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
incx
,
incy
complex
(
kind
=
C_DOUBLE_COMPLEX
)
::
alpha
,
beta
integer
(
kind
=
C_intptr_T
)
::
a
,
x
,
y
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_zgemv_c
(
cublasHandle
,
cta
,
m
,
n
,
alpha
,
a
,
lda
,
x
,
incx
,
beta
,
y
,
incy
)
#endif
end
subroutine
cublas_zgemv
...
...
@@ -1021,7 +1021,7 @@ module cuda_functions
integer
(
kind
=
C_INT
),
intent
(
in
)
::
lda
,
incx
,
incy
complex
(
kind
=
C_FLOAT_COMPLEX
)
::
alpha
,
beta
integer
(
kind
=
C_intptr_T
)
::
a
,
x
,
y
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
cublas_cgemv_c
(
cublasHandle
,
cta
,
m
,
n
,
alpha
,
a
,
lda
,
x
,
incx
,
beta
,
y
,
incy
)
#endif
end
subroutine
cublas_cgemv
...
...
@@ -1036,7 +1036,7 @@ module cuda_functions
! integer(kind=C_INT), intent(in) :: lda,incx,incy
! real(kind=C_DOUBLE) :: alpha,beta
! integer(kind=C_intptr_T) :: a, x, y
! #ifdef WITH_GPU_VERSION
! #ifdef WITH_
NVIDIA_
GPU_VERSION
! call cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
! #endif
! end subroutine cublas_dsymv
...
...
@@ -1050,7 +1050,7 @@ module cuda_functions
! integer(kind=C_INT), intent(in) :: lda,incx,incy
! real(kind=C_FLOAT) :: alpha,beta
! integer(kind=C_intptr_T) :: a, x, y
! #ifdef WITH_GPU_VERSION
! #ifdef WITH_
NVIDIA_
GPU_VERSION
! call cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
! #endif
! end subroutine cublas_ssymv
...
...
@@ -1064,7 +1064,7 @@ module cuda_functions
! integer(kind=C_INT), intent(in) :: lda,incx,incy
! complex(kind=C_DOUBLE_COMPLEX) :: alpha,beta
! integer(kind=C_intptr_T) :: a, x, y
! #ifdef WITH_GPU_VERSION
! #ifdef WITH_
NVIDIA_
GPU_VERSION
! ! call cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
! #endif
! end subroutine cublas_zsymv
...
...
@@ -1078,7 +1078,7 @@ module cuda_functions
! integer(kind=C_INT), intent(in) :: lda,incx,incy
! complex(kind=C_FLOAT_COMPLEX) :: alpha,beta
! integer(kind=C_intptr_T) :: a, x, y
! #ifdef WITH_GPU_VERSION
! #ifdef WITH_
NVIDIA_
GPU_VERSION
! ! call cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
! #endif
! end subroutine cublas_csymv
...
...
src/elpa2/GPU/CUDA/interface_c_kernel.F90
View file @
bfad08b8
...
...
@@ -252,7 +252,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
nev
,
nb
,
ldq
,
ncols
integer
(
kind
=
c_intptr_t
)
::
q
integer
(
c_intptr_t
)
::
hh_tau
,
hh
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_compute_hh_trafo_c_kernel_real_double
(
q
,
hh
,
hh_tau
,
nev
,
nb
,
ldq
,
ncols
)
#endif
end
subroutine
...
...
@@ -264,7 +264,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
nev
,
nb
,
ldq
,
ncols
integer
(
kind
=
c_intptr_t
)
::
q
integer
(
c_intptr_t
)
::
hh_tau
,
hh
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_compute_hh_trafo_c_kernel_real_single
(
q
,
hh
,
hh_tau
,
nev
,
nb
,
ldq
,
ncols
)
#endif
end
subroutine
...
...
@@ -276,7 +276,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
nev
,
nb
,
ldq
,
ncols
integer
(
kind
=
c_intptr_t
)
::
q
integer
(
kind
=
c_intptr_t
)
::
hh_tau
,
hh
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_compute_hh_trafo_c_kernel_complex_double
(
q
,
hh
,
hh_tau
,
nev
,
nb
,
ldq
,
ncols
)
#endif
end
subroutine
...
...
@@ -288,7 +288,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
nev
,
nb
,
ldq
,
ncols
integer
(
kind
=
c_intptr_t
)
::
q
integer
(
kind
=
c_intptr_t
)
::
hh_tau
,
hh
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_compute_hh_trafo_c_kernel_complex_single
(
q
,
hh
,
hh_tau
,
nev
,
nb
,
ldq
,
ncols
)
#endif
end
subroutine
...
...
@@ -301,7 +301,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
row_count
integer
(
kind
=
c_int
)
::
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
integer
(
kind
=
c_intptr_t
)
::
a_dev
,
row_group_dev
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_my_unpack_c_kernel_real_double
(
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
&
l_nev
,
row_group_dev
,
a_dev
)
#endif
...
...
@@ -315,7 +315,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
row_count
integer
(
kind
=
c_int
)
::
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
integer
(
kind
=
c_intptr_t
)
::
a_dev
,
row_group_dev
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_my_unpack_c_kernel_real_single
(
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
&
l_nev
,
row_group_dev
,
a_dev
)
#endif
...
...
@@ -329,7 +329,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
integer
(
kind
=
c_intptr_t
)
::
a_dev
integer
(
kind
=
c_intptr_t
)
::
row_group_dev
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_my_pack_c_kernel_real_double
(
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
,
a_dev
,
&
row_group_dev
)
#endif
...
...
@@ -343,7 +343,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
integer
(
kind
=
c_intptr_t
)
::
a_dev
integer
(
kind
=
c_intptr_t
)
::
row_group_dev
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_my_pack_c_kernel_real_single
(
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
,
a_dev
,
&
row_group_dev
)
#endif
...
...
@@ -357,7 +357,7 @@ module cuda_c_kernel
integer
(
kind
=
c_intptr_t
)
::
hh_tau
integer
(
kind
=
c_int
)
::
nb
,
n
integer
(
kind
=
c_int
)
::
is_zero
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_extract_hh_tau_c_kernel_real_double
(
hh
,
hh_tau
,
nb
,
n
,
is_zero
)
#endif
end
subroutine
...
...
@@ -370,7 +370,7 @@ module cuda_c_kernel
integer
(
kind
=
c_intptr_t
)
::
hh_tau
integer
(
kind
=
c_int
)
::
nb
,
n
integer
(
kind
=
c_int
)
::
is_zero
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_extract_hh_tau_c_kernel_real_single
(
hh
,
hh_tau
,
nb
,
n
,
is_zero
)
#endif
end
subroutine
...
...
@@ -383,7 +383,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
row_count
integer
(
kind
=
c_int
)
::
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
integer
(
kind
=
c_intptr_t
)
::
a_dev
,
row_group_dev
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_my_unpack_c_kernel_complex_double
(
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
,
&
row_group_dev
,
a_dev
)
#endif
...
...
@@ -397,7 +397,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
row_count
integer
(
kind
=
c_int
)
::
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
integer
(
kind
=
c_intptr_t
)
::
a_dev
,
row_group_dev
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_my_unpack_c_kernel_complex_single
(
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
,
&
row_group_dev
,
a_dev
)
#endif
...
...
@@ -411,7 +411,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
integer
(
kind
=
c_intptr_t
)
::
a_dev
integer
(
kind
=
c_intptr_t
)
::
row_group_dev
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_my_pack_c_kernel_complex_double
(
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
,
a_dev
,
&
row_group_dev
)
#endif
...
...
@@ -425,7 +425,7 @@ module cuda_c_kernel
integer
(
kind
=
c_int
)
::
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
integer
(
kind
=
c_intptr_t
)
::
a_dev
integer
(
kind
=
c_intptr_t
)
::
row_group_dev
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_my_pack_c_kernel_complex_single
(
row_count
,
n_offset
,
max_idx
,
stripe_width
,
a_dim2
,
stripe_count
,
l_nev
,
a_dev
,
&
row_group_dev
)
#endif
...
...
@@ -439,7 +439,7 @@ module cuda_c_kernel
integer
(
kind
=
c_intptr_t
)
::
hh_tau
integer
(
kind
=
c_int
)
::
nb
,
n
integer
(
kind
=
c_int
)
::
is_zero
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_extract_hh_tau_c_kernel_complex_double
(
hh
,
hh_tau
,
nb
,
n
,
is_zero
)
#endif
end
subroutine
...
...
@@ -452,7 +452,7 @@ module cuda_c_kernel
integer
(
kind
=
c_intptr_t
)
::
hh_tau
integer
(
kind
=
c_int
)
::
nb
,
n
integer
(
kind
=
c_int
)
::
is_zero
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
call
launch_extract_hh_tau_c_kernel_complex_single
(
hh
,
hh_tau
,
nb
,
n
,
is_zero
)
#endif
end
subroutine
...
...
src/elpa_index.c
View file @
bfad08b8
...
...
@@ -112,7 +112,7 @@ static int min_tile_size_cardinality(elpa_index_t index);
static
int
min_tile_size_enumerate
(
elpa_index_t
index
,
int
i
);
static
int
min_tile_size_is_valid
(
elpa_index_t
index
,
int
n
,
int
new_value
);
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
int
gpu_count
();
#endif
...
...
@@ -1104,7 +1104,7 @@ static int max_stored_rows_is_valid(elpa_index_t index, int n, int new_value) {
}
static
int
use_gpu_id_cardinality
(
elpa_index_t
index
)
{
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
int
count
;
count
=
gpu_count
();
if
(
count
==
-
1000
)
{
...
...
@@ -1123,7 +1123,7 @@ static int use_gpu_id_enumerate(elpa_index_t index, int i) {
}
static
int
use_gpu_id_is_valid
(
elpa_index_t
index
,
int
n
,
int
new_value
)
{
#ifdef WITH_GPU_VERSION
#ifdef WITH_
NVIDIA_
GPU_VERSION
int
count
;
count
=
gpu_count
();
if
(
count
==
-
1000
)
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment