Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
4d6f8a4f
Commit
4d6f8a4f
authored
Sep 26, 2019
by
Andreas Marek
Browse files
WIP: start to templatize skew symm code; Not yet compilable
parent
b634f55e
Changes
11
Hide whitespace changes
Inline
Side-by-side
Makefile.am
View file @
4d6f8a4f
...
...
@@ -63,8 +63,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa_generalized/cannon.c
\
#src/elpa_generalized/test_c_bindings.c
\
src/helpers/matrix_plot.F90
\
src/general/
d
ssmv.F90
\
src/general/
d
ssr2.F90
\
src/general/ssmv
_template
.F90
\
src/general/ssr2
_template
.F90
\
src/elpa_index.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa_c_interface.c
...
...
@@ -103,7 +103,6 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa2/qr/elpa_pdgeqrf_template.F90
\
src/elpa2/elpa2_bandred_template.F90
\
src/elpa2/elpa2_symm_matrix_allreduce_real_template.F90
\
src/elpa2/elpa2_ssymm_matrix_allreduce_real_template.F90
\
src/elpa2/elpa2_trans_ev_band_to_full_template.F90
\
src/elpa2/elpa2_tridiag_band_template.F90
\
src/elpa2/elpa2_trans_ev_tridi_to_band_template.F90
\
...
...
@@ -706,7 +705,6 @@ EXTRA_DIST = \
src/elpa1/elpa_reduce_add_vectors.F90
\
src/elpa1/elpa_solve_tridi_impl_public.F90
\
src/elpa1/elpa_transpose_vectors.F90
\
src/elpa1/elpa_transpose_vectors_ss.F90
\
src/elpa2/GPU/ev_tridi_band_gpu_c_v2_complex_template.cu
\
src/elpa2/GPU/ev_tridi_band_gpu_c_v2_real_template.cu
\
src/elpa2/compute_hh_trafo.F90
\
...
...
src/elpa1/elpa1_compute_private.F90
View file @
4d6f8a4f
...
...
@@ -172,8 +172,11 @@ module elpa1_compute
#define DOUBLE_PRECISION 1
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC
#include "elpa_transpose_vectors.F90"
#
include "elpa_transpose_vectors_ss.F90"
#
undef SKEW_SYMMETRIC
#include "elpa_reduce_add_vectors.F90"
#undef DOUBLE_PRECISION
#undef REALCASE
...
...
@@ -185,7 +188,9 @@ module elpa1_compute
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#include "elpa_transpose_vectors_ss.F90"
#define SKEW_SYMMETRIC
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC
#include "elpa_reduce_add_vectors.F90"
#undef SINGLE_PRECISION
#undef REALCASE
...
...
@@ -197,7 +202,9 @@ module elpa1_compute
#define DOUBLE_PRECISION 1
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#include "elpa_transpose_vectors_ss.F90"
#define SKEW_SYMMETRIC
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC
#include "elpa_reduce_add_vectors.F90"
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
...
...
@@ -208,7 +215,9 @@ module elpa1_compute
#define SINGLE_PRECISION 1
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#include "elpa_transpose_vectors_ss.F90"
#define SKEW_SYMMETRIC
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC
#include "elpa_reduce_add_vectors.F90"
#undef COMPLEXCASE
#undef SINGLE_PRECISION
...
...
src/elpa1/elpa1_tridiag_template.F90
View file @
4d6f8a4f
...
...
@@ -185,9 +185,9 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
if
(
istat
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
endif
isSkewsymmetric
=
(
skewsymmetric
==
1
)
if
(
useGPU
)
then
gpuString
=
"_gpu"
else
...
...
@@ -538,12 +538,10 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
if
(
isSkewsymmetric
)
then
call
PRECISION_GEMV
(
'N'
,
l_row_end
-
l_row_beg
+1
,
l_col_end
-
l_col_beg
+1
,
&
-
ONE
,
a_mat
(
l_row_beg
,
l_col_beg
),
lda
,
v_col
(
l_col_beg
),
1
,
&
ONE
,
ur_p
(
l_row_beg
,
my_thread
),
1
)
else
call
PRECISION_GEMV
(
'N'
,
l_row_end
-
l_row_beg
+1
,
l_col_end
-
l_col_beg
+1
,
&
ONE
,
a_mat
(
l_row_beg
,
l_col_beg
),
lda
,
v_col
(
l_col_beg
),
1
,
&
ONE
,
ur_p
(
l_row_beg
,
my_thread
),
1
)
endif
endif
...
...
src/elpa1/elpa_transpose_vectors.F90
View file @
4d6f8a4f
...
...
@@ -50,7 +50,14 @@
#include "config-f90.h"
#include "../general/sanity.F90"
subroutine
elpa_transpose_vectors_
&
#ifdef SKEW_SYMMETRIC
#define ROUTINE_NAME elpa_transpose_vectors_ss_
#else
#define ROUTINE_NAME elpa_transpose_vectors_
#endif
subroutine
ROUTINE_NAME
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
...
...
@@ -96,7 +103,7 @@ subroutine elpa_transpose_vectors_&
integer
(
kind
=
ik
)
::
auxstride
integer
(
kind
=
ik
),
intent
(
in
)
::
nrThreads
call
obj
%
timer
%
start
(
"
elpa_transpose_vectors_
&
call
obj
%
timer
%
start
(
"
ROUTINE_NAME
&
&MATH_DATATYPE&
&"
//
&
&
PRECISION_SUFFIX
&
...
...
@@ -190,7 +197,11 @@ subroutine elpa_transpose_vectors_&
k
=
(
i
-
nblks_skip
-
n
)/
lcm_s_t
*
nblk
+
(
lc
-
1
)
*
auxstride
ns
=
(
i
/
npt
)
*
nblk
! local start of block i
nl
=
min
(
nvr
-
i
*
nblk
,
nblk
)
! length
#ifdef SKEW_SYMMETRIC
vmat_t
(
ns
+1
:
ns
+
nl
,
lc
)
=
-
aux
(
k
+1
:
k
+
nl
)
#else
vmat_t
(
ns
+1
:
ns
+
nl
,
lc
)
=
aux
(
k
+1
:
k
+
nl
)
#endif
! k = k+nblk
enddo
enddo
...
...
@@ -203,7 +214,7 @@ subroutine elpa_transpose_vectors_&
#endif
deallocate
(
aux
)
call
obj
%
timer
%
stop
(
"
elpa_transpose_vectors_
&
call
obj
%
timer
%
stop
(
"
ROUTINE_NAME
&
&MATH_DATATYPE&
&"
//
&
&
PRECISION_SUFFIX
&
...
...
src/elpa2/elpa2_bandred_template.F90
View file @
4d6f8a4f
...
...
@@ -139,7 +139,7 @@
integer
(
kind
=
ik
)
::
istep
,
ncol
,
lch
,
lcx
,
nlc
integer
(
kind
=
ik
)
::
tile_size
,
l_rows_tile
,
l_cols_tile
real
(
kind
=
rk
)
::
vnorm2
real
(
kind
=
rk
)
::
vnorm2
MATH_DATATYPE
(
kind
=
rck
)
::
xf
,
aux1
(
nbw
),
aux2
(
nbw
),
vrl
,
tau
,
vav
(
nbw
,
nbw
)
! complex(kind=COMPLEX_DATATYPE), allocatable :: tmpCUDA(:,:), vmrCUDA(:,:), umcCUDA(:,:) ! note the different dimension in real case
...
...
@@ -194,9 +194,9 @@
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
endif
isSkewsymmetric
=
(
skewsymmetric
==
1
)
if
(
useGPU
)
then
gpuString
=
"_gpu"
else
...
...
@@ -1183,11 +1183,11 @@
if
(
i
==
0
)
cycle
lre
=
min
(
l_rows
,
i
*
l_rows_tile
)
call
obj
%
timer
%
start
(
"blas"
)
if
(
isSkewsymmetric
)
then
call
PRECISION_GEMM
(
'N'
,
'N'
,
lre
,
n_cols
,
lce
-
lcs
+1
,
-
ONE
,
a_mat
(
1
,
lcs
),
lda
,
&
umcCPU
(
lcs
,
n_cols
+1
),
ubound
(
umcCPU
,
dim
=
1
),
&
ONE
,
&
ONE
,
&
vmrCPU
(
1
,
n_cols
+1
),
ubound
(
vmrCPU
,
dim
=
1
))
else
call
PRECISION_GEMM
(
'N'
,
'N'
,
lre
,
n_cols
,
lce
-
lcs
+1
,
ONE
,
a_mat
(
1
,
lcs
),
lda
,
&
...
...
@@ -1465,7 +1465,7 @@
! Transpose umc -> umr (stored in vmr, second half)
if
(
isSkewsymmetric
)
then
call
elpa_transpose_vectors_ss_
&
call
elpa_transpose_vectors_ss_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
...
...
@@ -1473,7 +1473,7 @@
vmrCUDA
(
cur_l_rows
*
n_cols
+
1
),
cur_l_rows
,
mpi_comm_rows
,
&
1
,
istep
*
nbw
,
n_cols
,
nblk
,
max_threads
)
else
call
elpa_transpose_vectors_
&
call
elpa_transpose_vectors_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
...
...
@@ -1516,7 +1516,7 @@
umcCPU
(
1
,
n_cols
+1
),
ubound
(
umcCPU
,
dim
=
1
),
vav
,
&
ubound
(
vav
,
dim
=
1
),
ONE
,
umcCPU
,
ubound
(
umcCPU
,
dim
=
1
))
endif
#endif
#endif
#if COMPLEXCASE == 1
call
PRECISION_GEMM
(
'N'
,
'N'
,
l_cols
,
n_cols
,
n_cols
,
&
(
-0.5_rk
,
0.0_rk
),
&
...
...
src/elpa2/elpa2_compute_real_template.F90
View file @
4d6f8a4f
...
...
@@ -59,9 +59,12 @@
#undef COMPLEXCASE
#include "elpa2_bandred_template.F90"
#define REALCASE 1
#undef SKEW_SYMMETRIC
#include "elpa2_symm_matrix_allreduce_real_template.F90"
#if SKEWSYMMETRIC == 1
#include "elpa2_ssymm_matrix_allreduce_real_template.F90"
#define SKEW_SYMMETRIC
#include "elpa2_symm_matrix_allreduce_real_template.F90"
#undef SKEW_SYMMETRIC
#endif
#undef REALCASE
#define REALCASE 1
...
...
src/elpa2/elpa2_symm_matrix_allreduce_real_template.F90
View file @
4d6f8a4f
...
...
@@ -52,7 +52,18 @@
#include "../general/sanity.F90"
#ifdef SKEW_SYMMETRIC
#define ROUTINE_NAME ssymm_matrix_allreduce
#else
#define ROUTINE_NAME symm_matrix_allreduce
#endif
#ifdef SKEW_SYMMETRIC
subroutine
ssymm_matrix_allreduce_
&
#else
subroutine
symm_matrix_allreduce_
&
#endif
&
PRECISION
&
(
obj
,
n
,
a
,
lda
,
ldb
,
comm
)
!-------------------------------------------------------------------------------
...
...
@@ -73,7 +84,7 @@
integer
(
kind
=
ik
)
::
i
,
nc
,
mpierr
real
(
kind
=
REAL_DATATYPE
)
::
h1
(
n
*
n
),
h2
(
n
*
n
)
call
obj
%
timer
%
start
(
"
symm_matrix_allreduce
"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
start
(
"
ROUTINE_NAME
"
//
PRECISION_SUFFIX
)
nc
=
0
do
i
=
1
,
n
...
...
@@ -88,7 +99,11 @@
nc
=
0
do
i
=
1
,
n
a
(
1
:
i
,
i
)
=
h2
(
nc
+1
:
nc
+
i
)
#ifdef SKEW_SYMMETRIC
a
(
i
,
1
:
i
-1
)
=
-
a
(
1
:
i
-1
,
i
)
#else
a
(
i
,
1
:
i
-1
)
=
a
(
1
:
i
-1
,
i
)
#endif
nc
=
nc
+
i
enddo
...
...
@@ -98,7 +113,11 @@
nc
=
0
do
i
=
1
,
n
a
(
1
:
i
,
i
)
=
h1
(
nc
+1
:
nc
+
i
)
#ifdef SKEW_SYMMETRIC
a
(
i
,
1
:
i
-1
)
=
-
a
(
1
:
i
-1
,
i
)
#else
a
(
i
,
1
:
i
-1
)
=
a
(
1
:
i
-1
,
i
)
#endif
nc
=
nc
+
i
enddo
...
...
@@ -110,9 +129,13 @@
! nc = nc+i
! enddo
call
obj
%
timer
%
stop
(
"
symm_matrix_allreduce
"
//
PRECISION_SUFFIX
)
call
obj
%
timer
%
stop
(
"
ROUTINE_NAME
"
//
PRECISION_SUFFIX
)
#ifdef SKEW_SYMMETRIC
end
subroutine
ssymm_matrix_allreduce_
&
#else
end
subroutine
symm_matrix_allreduce_
&
#endif
&
PRECISION
...
...
src/elpa2/elpa2_template.F90
View file @
4d6f8a4f
...
...
@@ -132,7 +132,7 @@
integer
(
kind
=
ik
)
::
nrThreads
! #if SKEWSYMMETRIC ==1
integer
(
kind
=
ik
)
::
global_index
integer
(
kind
=
ik
)
::
global_index
! #endif
#if REALCASE == 1
#undef GPU_KERNEL
...
...
@@ -186,7 +186,7 @@
print
*
,
"Problem getting option. Aborting..."
stop
endif
call
obj
%
get
(
"mpi_comm_cols"
,
mpi_comm_cols
,
error
)
call
obj
%
get
(
"mpi_comm_cols"
,
mpi_comm_cols
,
error
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
...
...
@@ -236,13 +236,13 @@
print
*
,
"Problem getting option. Aborting..."
stop
endif
call
obj
%
get
(
"is_skewsymmetric"
,
skewsymmetric
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric
=
(
skewsymmetric
==
1
)
! GPU settings
...
...
@@ -690,10 +690,10 @@
endif
#endif
endif
if
(
isSkewsymmetric
)
then
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
! This makes the eigenvectors complex.
! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
q
(
1
:
obj
%
local_nrows
,
obj
%
local_ncols
+1
:
2
*
obj
%
local_ncols
)
=
0.0
do
i
=
1
,
obj
%
local_nrows
...
...
@@ -713,7 +713,7 @@
q
(
i
,
obj
%
local_ncols
+1
:
2
*
obj
%
local_ncols
)
=
-
q
(
i
,
1
:
obj
%
local_ncols
)
q
(
i
,
1
:
obj
%
local_ncols
)
=
0
end
if
end
do
end
do
endif
! print * , "q="
! do i=1,na
...
...
@@ -754,7 +754,7 @@
call
obj
%
timer
%
stop
(
"trans_ev_to_band"
)
if
(
.not.
(
success
))
return
! ! We can now deallocate the stored householder vectors
! deallocate(hh_trans, stat=istat, errmsg=errorMessage)
! if (istat .ne. 0) then
...
...
@@ -798,7 +798,7 @@
call
obj
%
timer
%
start
(
"trans_ev_to_full"
)
if
(
(
do_useGPU_trans_ev_band_to_full
)
.and.
.not.
(
do_useGPU_trans_ev_tridi_to_band
)
)
then
! copy to device if we want to continue on GPU
successCUDA
=
cuda_malloc
(
q_dev
,
ldq
*
matrixCols
*
size_of_datatype
)
! if (.not.(successCUDA)) then
! print *,"elpa2_template, error in cuda_malloc"
...
...
src/elpa2/elpa2_tridiag_band_template.F90
View file @
4d6f8a4f
...
...
@@ -142,7 +142,7 @@
if
(
istat
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
endif
isSkewsymmetric
=
(
skewsymmetric
==
1
)
if
(
useGPU
)
then
...
...
@@ -653,7 +653,7 @@
endif
if
(
wantDebug
)
call
obj
%
timer
%
start
(
"blas"
)
#if REALCASE == 1
if
(
isSkewsymmetric
)
then
if
(
isSkewsymmetric
)
then
! call PRECISION_SSR2('L', nc, -ONE, hd, 1, hv, 1, ab(1,ns), 2*nb-1)
call
dssr2
(
'L'
,
nc
,
-
ONE
,
hd
,
1
,
hv
,
1
,
ab
(
1
,
ns
),
2
*
nb
-1
)
else
...
...
src/general/
d
ssmv.F90
→
src/general/ssmv
_template
.F90
View file @
4d6f8a4f
SUBROUTINE
DSSMV
(
UPLO
,
N
,
ALPHA
,
A
,
LDA
,
X
,
INCX
,
BETA
,
Y
,
INCY
)
subroutine
BLAS_SUFFIX
&
ssmv
(
UPLO
,
N
,
ALPHA
,
A
,
LDA
,
X
,
INCX
,
BETA
,
Y
,
INCY
)
!
use
precision
use
iso_c_binding
IMPLICIT
NONE
!
! .. Scalar Arguments ..
CHARACTER
UPLO
INTEGER
N
,
LDA
,
INCX
,
INCY
DOUBLE PRECISION
ALPHA
,
BETA
CHARACTER
::
UPLO
INTEGER
(
kind
=
c_int
)
::
N
,
LDA
,
INCX
,
INCY
MATH_DATATYPE
(
kind
=
rck
)
::
ALPHA
,
BETA
! ..
! .. Array Arguments ..
DOUBLE PRECISION
A
(
LDA
,
*
),
X
(
*
),
Y
(
*
)
MATH_DATATYPE
(
kind
=
rck
)
::
A
(
LDA
,
*
),
X
(
*
),
Y
(
*
)
! ..
!
! Purpose
...
...
@@ -103,15 +106,14 @@
! .. Parameters ..
DOUBLE PRECISION
ZERO
,
ONE
PARAMETER
(
ZERO
=
0.0D+0
,
ONE
=
1.0D+0
)
INTEGER
NB
PARAMETER
(
NB
=
64
)
INTEGER
(
kind
=
c_int
),
parameter
::
NB
=
64
! ..
! .. Local Scalars ..
INTEGER
II
,
JJ
,
IC
,
IY
,
JC
,
JX
,
KX
,
KY
,
INFO
DOUBLE PRECISION
TEMP
LOGICAL
UPPER
INTEGER
(
kind
=
c_int
)
::
II
,
JJ
,
IC
,
IY
,
JC
,
JX
,
KX
,
KY
,
INFO
MATH_DATATYPE
(
kind
=
rck
)
::
TEMP
LOGICAL
::
UPPER
! .. Local Arrays ..
DOUBLE PRECISION
WORK
(
NB
)
MATH_DATATYPE
(
kind
=
rck
)
::
WORK
(
NB
)
! ..
! .. External Functions ..
LOGICAL
LSAME
...
...
@@ -183,10 +185,10 @@
! and DGEMV for off-diagonal blocks.
!
IF
(
II
.GT.
JJ
)
THEN
CALL
D
GEMV
(
'T'
,
NB
,
IC
,
-
ALPHA
,
A
(
JJ
,
II
),
LDA
,
&
CALL
PRECISIOM_
GEMV
(
'T'
,
NB
,
IC
,
-
ALPHA
,
A
(
JJ
,
II
),
LDA
,
&
&
X
(
JX
),
INCX
,
TEMP
,
Y
(
IY
),
INCY
)
ELSE
IF
(
II
.LT.
JJ
)
THEN
CALL
D
GEMV
(
'N'
,
NB
,
JC
,
ALPHA
,
A
(
II
,
JJ
),
LDA
,
&
CALL
PRECISION_
GEMV
(
'N'
,
NB
,
JC
,
ALPHA
,
A
(
II
,
JJ
),
LDA
,
&
&
X
(
JX
),
INCX
,
TEMP
,
Y
(
IY
),
INCY
)
ELSE
CALL
DSSMV_SM
(
UPPER
,
JC
,
ALPHA
,
A
(
JJ
,
JJ
),
LDA
,
&
...
...
@@ -213,10 +215,10 @@
! and DGEMV for off-diagonal blocks.
!
IF
(
II
.LT.
JJ
)
THEN
CALL
D
GEMV
(
'T'
,
JC
,
NB
,
-
ALPHA
,
A
(
JJ
,
II
),
LDA
,
&
CALL
PRECISION_
GEMV
(
'T'
,
JC
,
NB
,
-
ALPHA
,
A
(
JJ
,
II
),
LDA
,
&
&
X
(
JX
),
INCX
,
TEMP
,
Y
(
IY
),
INCY
)
ELSE
IF
(
II
.GT.
JJ
)
THEN
CALL
D
GEMV
(
'N'
,
IC
,
NB
,
ALPHA
,
A
(
II
,
JJ
),
LDA
,
&
CALL
PRECISION_
GEMV
(
'N'
,
IC
,
NB
,
ALPHA
,
A
(
II
,
JJ
),
LDA
,
&
&
X
(
JX
),
INCX
,
TEMP
,
Y
(
IY
),
INCY
)
ELSE
CALL
DSSMV_SM
(
UPPER
,
JC
,
ALPHA
,
A
(
JJ
,
JJ
),
LDA
,
&
...
...
src/general/
d
ssr2.F90
→
src/general/ssr2
_template
.F90
View file @
4d6f8a4f
File moved
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment