Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
cfa307bb
Commit
cfa307bb
authored
Oct 28, 2019
by
Andreas Marek
Browse files
Merge branch 'master_pre_stage' into skew
parents
5d0b533f
fa78e003
Changes
10
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
.gitlab-ci.yml
View file @
cfa307bb
This source diff could not be displayed because it is too large. You can
view the blob
instead.
ci_test_scripts/generate_gitlab_ci_tests.py
View file @
cfa307bb
...
...
@@ -148,7 +148,7 @@ def set_cflags_fcflags(instr, cc, fc, instruction_set):
FCFLAGS
+=
"-O3 -xMIC-AVX512"
if
(
instr
==
"avx2"
):
INSTRUCTION_OPTIONS
=
instruction_set
[
instr
]
INSTRUCTION_OPTIONS
=
instruction_set
[
instr
]
+
" --disable-avx512"
if
(
cc
==
"gnu"
):
CFLAGS
+=
"-O3 -mavx2 -mfma"
else
:
...
...
configure.ac
View file @
cfa307bb
...
...
@@ -1313,6 +1313,29 @@ if test x"$use_real_gpu" = x"yes" -o x"$use_complex_gpu" = x"yes" ; then
AC_DEFINE([WITH_GPU_KERNEL],[1],[GPU kernel should be build])
ELPA_2STAGE_COMPLEX_GPU_COMPILED=1
ELPA_2STAGE_REAL_GPU_COMPILED=1
AC_MSG_CHECKING(whether --enable-nvtx is specified)
AC_ARG_ENABLE([nvtx],
AS_HELP_STRING([--enable-nvtx],
[build and install nvtx wrapper for profiling th GPU version, default no.]),
[
if test x"$enableval" = x"yes"; then
enable_nvtx=yes
else
enable_nvtx=no
fi
],
[enable_nvtx=no])
AC_MSG_RESULT([${enable_nvtx}])
if test x"${enable_nvtx}" = x"yes"; then
AC_DEFINE([WITH_NVTX],[1],[enable NVTX support])
AC_LANG_PUSH([C])
AC_SEARCH_LIBS([nvtxRangePop],[nvToolsExt],[have_nvtoolsext=yes],[have_nvtoolsext=no])
if test x"${have_nvtoolsext}" = x"no"; then
AC_MSG_ERROR([Could not link nvToolsExt; try to set the cuda-path or disable GPU support ])
fi
AC_LANG_POP([C])
fi
else
ELPA_2STAGE_COMPLEX_GPU_COMPILED=0
ELPA_2STAGE_REAL_GPU_COMPILED=0
...
...
src/GPU/mod_cuda.F90
View file @
cfa307bb
...
...
@@ -465,8 +465,41 @@ module cuda_functions
end
interface
#ifdef WITH_NVTX
! NVTX profiling interfaces
interface
nvtxRangePushA
subroutine
nvtxRangePushA
(
name
)
bind
(
C
,
name
=
'nvtxRangePushA'
)
use
iso_c_binding
character
(
kind
=
C_CHAR
,
len
=
1
)
::
name
(
*
)
end
subroutine
end
interface
interface
nvtxRangePop
subroutine
nvtxRangePop
()
bind
(
C
,
name
=
'nvtxRangePop'
)
end
subroutine
end
interface
#endif
contains
#ifdef WITH_NVTX
! this wrapper is needed for the string conversion
subroutine
nvtxRangePush
(
range_name
)
implicit
none
character
(
len
=*
),
intent
(
in
)
::
range_name
character
(
kind
=
C_CHAR
,
len
=
1
),
dimension
(
len
(
range_name
)
+1
)
::
c_name
integer
i
do
i
=
1
,
len
(
range_name
)
c_name
(
i
)
=
range_name
(
i
:
i
)
end
do
c_name
(
len
(
range_name
)
+1
)
=
char
(
0
)
call
nvtxRangePushA
(
c_name
)
end
subroutine
#endif
! functions to set and query the CUDA devices
function
cublas_create
(
handle
)
result
(
success
)
...
...
src/elpa1/elpa1_template.F90
View file @
cfa307bb
...
...
@@ -142,6 +142,9 @@ function elpa_solve_evp_&
#else
nrThreads
=
1
#endif
#ifdef WITH_NVTX
call
nvtxRangePush
(
"elpa1"
)
#endif
success
=
.true.
...
...
@@ -352,6 +355,9 @@ function elpa_solve_evp_&
#ifdef HAVE_LIKWID
call
likwid_markerStartRegion
(
"tridi"
)
#endif
#ifdef WITH_NVTX
call
nvtxRangePush
(
"tridi"
)
#endif
call
tridiag_
&
&
MATH_DATATYPE
&
...
...
@@ -359,6 +365,9 @@ function elpa_solve_evp_&
&
PRECISION
&
&
(
obj
,
na
,
a
,
lda
,
nblk
,
matrixCols
,
mpi_comm_rows
,
mpi_comm_cols
,
ev
,
e
,
tau
,
do_useGPU_tridiag
,
wantDebug
,
nrThreads
)
#ifdef WITH_NVTX
call
nvtxRangePop
()
#endif
#ifdef HAVE_LIKWID
call
likwid_markerStopRegion
(
"tridi"
)
#endif
...
...
@@ -370,6 +379,9 @@ function elpa_solve_evp_&
#ifdef HAVE_LIKWID
call
likwid_markerStartRegion
(
"solve"
)
#endif
#ifdef WITH_NVTX
call
nvtxRangePush
(
"solve"
)
#endif
call
solve_tridi_
&
&
PRECISION
&
...
...
@@ -382,6 +394,9 @@ function elpa_solve_evp_&
#endif
nblk
,
matrixCols
,
mpi_comm_rows
,
mpi_comm_cols
,
do_useGPU_solve_tridi
,
wantDebug
,
success
,
nrThreads
)
#ifdef WITH_NVTX
call
nvtxRangePop
()
#endif
#ifdef HAVE_LIKWID
call
likwid_markerStopRegion
(
"solve"
)
#endif
...
...
@@ -447,6 +462,9 @@ function elpa_solve_evp_&
#ifdef HAVE_LIKWID
call
likwid_markerStartRegion
(
"trans_ev"
)
#endif
#ifdef WITH_NVTX
call
nvtxRangePush
(
"trans_ev"
)
#endif
! In the skew-symmetric case this transforms the real part
call
trans_ev_
&
...
...
@@ -465,6 +483,9 @@ function elpa_solve_evp_&
mpi_comm_rows
,
mpi_comm_cols
,
do_useGPU_trans_ev
)
endif
#ifdef WITH_NVTX
call
nvtxRangePop
()
#endif
#ifdef HAVE_LIKWID
call
likwid_markerStopRegion
(
"trans_ev"
)
#endif
...
...
@@ -505,6 +526,9 @@ function elpa_solve_evp_&
endif
endif
#ifdef WITH_NVTX
call
nvtxRangePop
()
#endif
! restore original OpenMP settings
#ifdef WITH_OPENMP
! store the number of OpenMP threads used in the calling function
...
...
src/elpa2/elpa2_bandred_template.F90
View file @
cfa307bb
...
...
@@ -1552,7 +1552,7 @@
endif
#endif
#if COMPLEXCASE == 1
call
PRECISION_GEMM
(
'N'
,
'N'
,
l_cols
,
n_cols
,
n_cols
,
&
call
PRECISION_GEMM
(
'N'
,
'N'
,
int
(
l_cols
,
kind
=
BLAS_KIND
),
int
(
n_cols
,
kind
=
BLAS_KIND
),
int
(
n_cols
,
kind
=
BLAS_KIND
)
,
&
(
-0.5_rk
,
0.0_rk
),
&
umcCPU
(
1
,
n_cols
+1
),
int
(
ubound
(
umcCPU
,
dim
=
1
),
kind
=
BLAS_KIND
),
vav
,
&
int
(
ubound
(
vav
,
dim
=
1
),
kind
=
BLAS_KIND
),
ONE
,
umcCPU
,
int
(
ubound
(
umcCPU
,
dim
=
1
),
kind
=
BLAS_KIND
))
...
...
src/general/elpa_ssmv_template.F90
View file @
cfa307bb
...
...
@@ -21,7 +21,7 @@ subroutine elpa_cssmv(n, alpha, a, lda, x, y)
implicit
none
#include "./precision_kinds.F90"
integer
(
kind
=
ik
)
::
n
,
lda
integer
(
kind
=
BLAS_KIND
)
::
n
,
lda
MATH_DATATYPE
(
kind
=
rck
)
::
alpha
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
lda
,
*
),
x
(
*
),
y
(
*
)
integer
(
kind
=
ik
),
parameter
::
nb
=
64
...
...
src/general/elpa_ssr2_template.F90
View file @
cfa307bb
...
...
@@ -21,7 +21,7 @@ subroutine elpa_cssr2(n, x, y, a, lda )
implicit
none
#include "./precision_kinds.F90"
integer
(
kind
=
ik
)
::
n
,
lda
integer
(
kind
=
BLAS_KIND
)
::
n
,
lda
MATH_DATATYPE
(
kind
=
rck
)
::
a
(
lda
,
*
),
x
(
*
),
y
(
*
)
integer
(
kind
=
ik
),
parameter
::
nb
=
64
MATH_DATATYPE
(
kind
=
rck
)
::
temp1
,
temp2
...
...
@@ -69,7 +69,7 @@ subroutine elpa_cssr2(n, x, y, a, lda )
#if REALCASE == 1
call
PRECISION_GER
(
int
(
ic
,
kind
=
BLAS_KIND
),
int
(
nb
,
kind
=
BLAS_KIND
),
-
one
,
x
(
ix
),
1_BLAS_KIND
,
y
(
jy
),
1_BLAS_KIND
,
&
a
(
ii
,
jj
),
int
(
lda
,
kind
=
BLAS_KIND
)
)
call
PRECISION_GER
(
i
c
,
int
(
nb
,
kind
=
BLAS_KIND
),
one
,
y
(
iy
),
1_BLAS_KIND
,
x
(
jx
),
1_BLAS_KIND
,
&
call
PRECISION_GER
(
i
nt
(
ic
,
kind
=
BLAS_KIND
)
,
int
(
nb
,
kind
=
BLAS_KIND
),
one
,
y
(
iy
),
1_BLAS_KIND
,
x
(
jx
),
1_BLAS_KIND
,
&
a
(
ii
,
jj
),
int
(
lda
,
kind
=
BLAS_KIND
)
)
#endif
end
do
...
...
test/Fortran/test_skewsymmetric.F90
View file @
cfa307bb
...
...
@@ -176,8 +176,9 @@ program test
print
*
,
''
endif
call
set_up_blacsgrid
(
mpi_comm_world
,
np_rows
,
np_cols
,
layout
,
&
my_blacs_ctxt
,
my_prow
,
my_pcol
)
call
set_up_blacsgrid
(
int
(
mpi_comm_world
,
kind
=
BLAS_KIND
),
np_rows
,
&
np_cols
,
layout
,
&
my_blacs_ctxt
,
my_prow
,
my_pcol
)
call
set_up_blacs_descriptor
(
na
,
nblk
,
my_prow
,
my_pcol
,
np_rows
,
np_cols
,
&
na_rows
,
na_cols
,
sc_desc
,
my_blacs_ctxt
,
info
)
...
...
@@ -278,7 +279,7 @@ program test
call
e_skewsymmetric
%
set
(
"solver"
,
elpa_solver_2stage
,
error_elpa
)
call
e_skewsymmetric
%
get
(
"is_skewsymmetric"
,
i
,
error_elpa
)
call
e_skewsymmetric
%
get
(
"is_skewsymmetric"
,
i
nt
(
i
,
kind
=
c_int
)
,
error_elpa
)
call
e_skewsymmetric
%
timer_start
(
"eigenvectors: skewsymmetric "
)
call
e_skewsymmetric
%
eigenvectors
(
a_skewsymmetric
,
ev_skewsymmetric
,
z_skewsymmetric
,
error_elpa
)
...
...
@@ -321,9 +322,14 @@ program test
#ifdef WITH_MPI
call
MPI_BARRIER
(
MPI_COMM_WORLD
,
mpierr
)
#endif
status
=
check_correctness_evp_numeric_residuals_ss
(
na
,
nev
,
as_skewsymmetric
,
z_complex
,
ev_skewsymmetric
,
&
sc_desc
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
)
#ifdef TEST_SINGLE
status
=
check_correctness_evp_numeric_residuals_ss_real_single
(
na
,
nev
,
as_skewsymmetric
,
z_complex
,
ev_skewsymmetric
,
&
sc_desc
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
)
#else
status
=
check_correctness_evp_numeric_residuals_ss_real_double
(
na
,
nev
,
as_skewsymmetric
,
z_complex
,
ev_skewsymmetric
,
&
sc_desc
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
)
#endif
#ifdef WITH_MPI
call
MPI_BARRIER
(
MPI_COMM_WORLD
,
mpierr
)
...
...
test/shared/test_check_correctness_template.F90
View file @
cfa307bb
...
...
@@ -69,30 +69,31 @@
#endif
#if REALCASE == 1
function
check_correctness_evp_numeric_residuals_ss_
&
&
MATH_DATATYPE
&
&
_
&
function
check_correctness_evp_numeric_residuals_ss_real_
&
&
PRECISION
&
&
(
na
,
nev
,
as
,
z
,
ev
,
sc_desc
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
)
result
(
status
)
use
tests_blas_interfaces
use
tests_scalapack_interfaces
use
precision_for_tests
use
iso_c_binding
implicit
none
#include "../../src/general/precision_kinds.F90"
integer
(
kind
=
ik
)
::
status
,
na_cols
,
na_rows
integer
(
kind
=
ik
),
intent
(
in
)
::
na
,
nev
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
real
(
kind
=
rk
),
intent
(
in
)
::
as
(:,:)
real
(
kind
=
rk
)
::
tmpr
complex
(
kind
=
rck
),
intent
(
in
)
::
z
(:,:)
real
(
kind
=
rk
)
::
ev
(:)
integer
(
kind
=
BLAS_KIND
)
::
status
,
na_cols
,
na_rows
integer
(
kind
=
BLAS_KIND
),
intent
(
in
)
::
na
,
nev
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
real
(
kind
=
rk
),
intent
(
in
)
::
as
(:,:)
real
(
kind
=
rk
)
::
tmpr
complex
(
kind
=
rck
),
intent
(
in
)
::
z
(:,:)
real
(
kind
=
rk
)
::
ev
(:)
complex
(
kind
=
rck
),
dimension
(
size
(
as
,
dim
=
1
),
size
(
as
,
dim
=
2
))
::
tmp1
,
tmp2
complex
(
kind
=
rck
)
::
xc
complex
(
kind
=
rck
)
::
xc
complex
(
kind
=
rck
),
allocatable
::
as_complex
(:,:)
complex
(
kind
=
rck
),
allocatable
::
as_complex
(:,:)
integer
(
kind
=
ik
)
::
sc_desc
(:)
integer
(
kind
=
BLAS_KIND
)
::
sc_desc
(:)
integer
(
kind
=
ik
)
::
i
,
j
,
rowLocal
,
colLocal
real
(
kind
=
rck
)
::
err
,
errmax
integer
(
kind
=
BLAS_KIND
)
::
i
,
j
,
rowLocal
,
colLocal
integer
(
kind
=
c_int
)
::
row_Local
,
col_Local
real
(
kind
=
rck
)
::
err
,
errmax
integer
::
mpierr
...
...
@@ -259,7 +260,11 @@
! First check, whether the elements on diagonal are 1 .. "normality" of the vectors
err
=
0.0_rk
do
i
=
1
,
nev
if
(
map_global_array_index_to_local_index
(
i
,
i
,
rowLocal
,
colLocal
,
nblk
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
))
then
if
(
map_global_array_index_to_local_index
(
int
(
i
,
kind
=
c_int
),
int
(
i
,
kind
=
c_int
),
row_Local
,
col_Local
,
&
int
(
nblk
,
kind
=
c_int
),
int
(
np_rows
,
kind
=
c_int
),
int
(
np_cols
,
kind
=
c_int
),
&
int
(
my_prow
,
kind
=
c_int
),
int
(
my_pcol
,
kind
=
c_int
))
)
then
rowLocal
=
int
(
row_Local
,
kind
=
INT_TYPE
)
colLocal
=
int
(
col_Local
,
kind
=
INT_TYPE
)
err
=
max
(
err
,
abs
(
tmp1
(
rowLocal
,
colLocal
)
-
CONE
))
endif
end
do
...
...
@@ -312,31 +317,48 @@
deallocate
(
as_complex
)
end
function
#endif
#endif
/* REALCASE */
#if REALCASE == 1
#ifdef DOUBLE_PRECISION_REAL
!c>
int
check_correctness_evp_numeric_residuals_real_double_f(
int na, int nev, int na_rows, int
na_cols,
!c> double *as, double *z, double *ev,
int
sc_desc[9],
!c>
int nblk, int myid, int np_rows, int np_cols, int my_prow, int
my_pcol);
!c>
TEST_C_INT_TYPE
check_correctness_evp_numeric_residuals_
ss_
real_double_f(
TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE
na_cols,
!c> double *as,
complex
double *z, double *ev,
TEST_C_INT_TYPE
sc_desc[9],
!c>
TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE
my_pcol);
#else
!c>
int
check_correctness_evp_numeric_residuals_real_single_f(
int na, int nev, int na_rows, int
na_cols,
!c> float *as, float *z, float *ev,
int
sc_desc[9],
!c>
int nblk, int myid, int np_rows, int np_cols, int my_prow, int
my_pcol);
!c>
TEST_C_INT_TYPE
check_correctness_evp_numeric_residuals_
ss_
real_single_f(
TEST_C_INT_TYPE na, TEST_C_INT_TYPE nev, TEST_C_INT_TYPE na_rows, TEST_C_INT_TYPE
na_cols,
!c> float *as,
complex
float *z, float *ev,
TEST_C_INT_TYPE
sc_desc[9],
!c>
TEST_C_INT_TYPE nblk, TEST_C_INT_TYPE myid, TEST_C_INT_TYPE np_rows, TEST_C_INT_TYPE np_cols, TEST_C_INT_TYPE my_prow, TEST_C_INT_TYPE
my_pcol);
#endif
#endif /* REALCASE */
#if COMPLEXCASE == 1
#ifdef DOUBLE_PRECISION_COMPLEX
!c> int check_correctness_evp_numeric_residuals_complex_double_f(int na, int nev, int na_rows, int na_cols,
!c> complex double *as, complex double *z, double *ev, int sc_desc[9],
!c> int nblk, int myid, int np_rows, int np_cols, int my_prow, int my_pcol);
#else
!c> int check_correctness_evp_numeric_residuals_complex_single_f(int na, int nev, int na_rows, int na_cols,
!c> complex float *as, complex float *z, float *ev, int sc_desc[9],
!c> int nblk, int myid, int np_rows, int np_cols, int my_prow, int my_pcol);
#endif
#endif /* COMPLEXCASE */
#if REALCASE == 1
function
check_correctness_evp_numeric_residuals_ss_real_
&
&
PRECISION
&
&
_
f
(
na
,
nev
,
na_rows
,
na_cols
,
as
,
z
,
ev
,
sc_desc
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
)
result
(
status
)
&
bind
(
C
,
name
=
"check_correctness_evp_numeric_residuals_ss_&
&MATH_DATATYPE&
&_&
&PRECISION&
&_f"
)
use
precision_for_tests
use
iso_c_binding
implicit
none
#include "./test_precision_kinds.F90"
TEST_INT_TYPE
::
status
TEST_INT_TYPE
,
value
::
na
,
nev
,
myid
,
na_rows
,
na_cols
,
nblk
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
real
(
kind
=
rck
)
::
as
(
1
:
na_rows
,
1
:
na_cols
)
complex
(
kind
=
rck
)
::
z
(
1
:
na_rows
,
1
:
na_cols
)
real
(
kind
=
rck
)
::
ev
(
1
:
na
)
TEST_INT_TYPE
::
sc_desc
(
1
:
9
)
status
=
check_correctness_evp_numeric_residuals_ss_real_
&
&
PRECISION
&
&
(
na
,
nev
,
as
,
z
,
ev
,
sc_desc
,
nblk
,
myid
,
np_rows
,
np_cols
,
my_prow
,
my_pcol
)
end
function
#endif /* REALCASE */
function
check_correctness_evp_numeric_residuals_
&
&
MATH_DATATYPE
&
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment