Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
elpa
elpa
Commits
5d0b533f
Commit
5d0b533f
authored
Oct 25, 2019
by
Andreas Marek
Browse files
Merge branch 'master' of gitlab.mpcdf.mpg.de:amarek/elpa_skew_symmetric into skew
parents
05ca1675
214ddc43
Changes
31
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Changelog
View file @
5d0b533f
Changelog for upcoming release
- not yet decided
- support of ELPA for real valued skew-symmetric matrices, please see:
CITATION
Changelog for ELPA 2019.05.002
...
...
Doxyfile.in
View file @
5d0b533f
...
...
@@ -885,6 +885,7 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \
@top_srcdir@/src/elpa_c_interface.c \
@top_srcdir@/src/elpa2/mod_pack_unpack_cpu.F90 \
@top_srcdir@/src/elpa2/elpa2_symm_matrix_allreduce_real_template.F90 \
@top_srcdir@/src/elpa2/elpa2_ssymm_matrix_allreduce_real_template.F90 \
@top_srcdir@/src/elpa2/elpa2_tridiag_band_template.F90 \
@top_srcdir@/src/elpa2/mod_redist_band.F90 \
@top_srcdir@/src/elpa2/pack_unpack_cpu.F90 \
...
...
@@ -1001,6 +1002,7 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \
@top_srcdir@/src/elpa1/elpa_solve_tridi_impl_public.F90 \
@top_srcdir@/src/elpa1/elpa1_trans_ev_template.F90 \
@top_srcdir@/src/elpa1/elpa_transpose_vectors.F90 \
@top_srcdir@/src/elpa1/elpa_transpose_vectors_ss.F90 \
@top_srcdir@/src/elpa1/elpa1_auxiliary.F90 \
@top_srcdir@/src/elpa1/elpa1_tridiag_template.F90 \
@top_srcdir@/src/elpa1/elpa1_tools_template.F90 \
...
...
INSTALL.md
View file @
5d0b533f
...
...
@@ -84,7 +84,7 @@ An excerpt of the most important (*ELPA* specific) options reads as follows:
| --disable-Fortran2008-features | disable Fortran 2008 if compiler does not support it |
| --enable-pyhton | build and install python wrapper, default no |
| --enable-python-tests | enable python tests, default no. |
| --enable-skew-symmetric-support | enable support for real valued skew-symmetric matrices |
We recommend that you do not build ELPA in its main directory but that you use it
in a sub-directory:
...
...
Makefile.am
View file @
5d0b533f
...
...
@@ -64,8 +64,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa1/elpa1.F90
\
src/elpa2/elpa2.F90
\
src/elpa_generalized/cannon.c
\
#src/elpa_generalized/test_c_bindings.c
\
src/helpers/matrix_plot.F90
\
src/general/mod_elpa_skewsymmetric_blas.F90
\
src/elpa_index.c
libelpa@SUFFIX@
_private_la_SOURCES
+=
src/elpa_c_interface.c
...
...
@@ -122,6 +122,8 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa1/elpa_invert_trm.F90
\
src/elpa1/elpa_multiply_a_b.F90
\
src/elpa1/elpa_solve_tridi_impl_public.F90
\
src/general/elpa_ssr2_template.F90
\
src/general/elpa_ssmv_template.F90
\
src/general/precision_macros.h
\
src/general/precision_typedefs.h
\
src/general/precision_kinds.F90
...
...
@@ -518,6 +520,10 @@ dist_man_MANS = \
man/elpa_autotune_save_state.3
\
man/elpa_autotune_load_state.3
\
man/elpa_autotune_print_state.3
\
man/elpa_autotune_setup.3
\
man/elpa_autotune_step.3
\
man/elpa_autotune_set_best.3
\
man/elpa_autotune_deallocate.3
\
man/elpa_uninit.3
if
ENABLE_LEGACY
...
...
@@ -542,11 +548,7 @@ dist_man_MANS += \
man/elpa_solve_evp_real_double.3
\
man/elpa_solve_evp_real_single.3
\
man/elpa_solve_evp_complex_double.3
\
man/elpa_solve_evp_complex_single.3
\
man/elpa_autotune_setup.3
\
man/elpa_autotune_step.3
\
man/elpa_autotune_set_best.3
\
man/elpa_autotune_deallocate.3
man/elpa_solve_evp_complex_single.3
endif
...
...
@@ -824,6 +826,8 @@ EXTRA_DIST = \
test
/shared/test_precision_kinds.F90
\
src/general/prow_pcol.F90
\
src/general/sanity.F90
\
src/general/elpa_ssr2_template.F90
\
src/general/elpa_ssmv_template.F90
\
test
/Fortran/assert.h
\
test
/Fortran/elpa_print_headers.F90
\
test
/shared/test_check_correctness_template.F90
\
...
...
configure.ac
View file @
5d0b533f
...
...
@@ -39,7 +39,7 @@ AC_SUBST([$1], ['$2'])
# API Version
AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version])
AC_DEFINE_SUBST(CURRENT_API_VERSION, 2019
0524
, "Current ELPA API version")
AC_DEFINE_SUBST(CURRENT_API_VERSION, 2019
1110
, "Current ELPA API version")
# Autotune Version
AC_DEFINE([EARLIEST_AUTOTUNE_VERSION], [20171201], [Earliest ELPA API version, which supports autotuning])
AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20190524], [Current ELPA autotune version])
...
...
@@ -1451,6 +1451,27 @@ fi
AM_CONDITIONAL([WANT_SINGLE_PRECISION_REAL],[test x"$want_single_precision" = x"yes"])
AM_CONDITIONAL([WANT_SINGLE_PRECISION_COMPLEX],[test x"$want_single_precision" = x"yes"])
#always define SKEWSYMMETRIC for the moment
AC_MSG_CHECKING(whether we should enable skew-symmetric support)
AC_ARG_ENABLE([skew-symmetric-support],
AS_HELP_STRING([--enable-skew-symmetric-support],
[enable support for real valued skew-symmetric matrices]),
[if test x"$enableval" = x"yes"; then
enable_skewsymmetric=yes
else
enable_skewsymmetric=no
fi],
[enable_skewsymmetric=no])
AC_MSG_RESULT([${enable_skewsymmetric}])
AM_CONDITIONAL([HAVE_SKEWSYMMETRIC],[test x"$enable_skewsymmetric" = x"yes"])
if test x"${enable_skewsymmetric}" = x"yes"; then
if test x"${USE_ASSUMED_SIZE}" = x"no"; then
AC_MSG_ERROR(you have to enable assumed size arrays, if you want to build with skew-symmetric support)
fi
AC_DEFINE([HAVE_SKEWSYMMETRIC],[1],[build for skewsyemmtric case])
fi
AC_SUBST([MPI_BINARY])
AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS])
...
...
generate/generate_precision.py
View file @
5d0b533f
...
...
@@ -4,6 +4,7 @@ import sys
simple_tokens
=
[
"PRECISION"
,
"elpa_transpose_vectors_NUMBER_PRECISION"
,
"elpa_transpose_vectors_ssNUMBER_PRECISION"
,
"elpa_reduce_add_vectors_NUMBER_PRECISION"
,
"bandred_NUMBER_PRECISION"
,
"trans_ev_band_to_full_NUMBER_PRECISION"
,
...
...
@@ -19,6 +20,7 @@ simple_tokens = [
"qr_pdgeqrf_2dcomm_PRECISION"
,
"hh_transform_NUMBER_PRECISION"
,
"symm_matrix_allreduce_PRECISION"
,
"ssymm_matrix_allreduce_PRECISION"
,
"herm_matrix_allreduce_PRECISION"
,
"redist_band_NUMBER_PRECISION"
,
"unpack_row_NUMBER_cpu_PRECISION"
,
...
...
generate_automake_test_programs.py
View file @
5d0b533f
...
...
@@ -285,6 +285,30 @@ print(" " + " \\\n ".join([
prec_flag
[
'double'
]]))
print
(
"endif"
)
name
=
"test_skewsymmetric_real_double"
print
(
"check_SCRIPTS += "
+
name
+
"_extended.sh"
)
print
(
"noinst_PROGRAMS += "
+
name
)
print
(
name
+
"_SOURCES = test/Fortran/test_skewsymmetric.F90"
)
print
(
name
+
"_LDADD = $(test_program_ldadd)"
)
print
(
name
+
"_FCFLAGS = $(test_program_fcflags)
\\
"
)
print
(
" "
+
"
\\\n
"
.
join
([
domain_flag
[
'real'
],
prec_flag
[
'double'
]]))
name
=
"test_skewsymmetric_real_single"
print
(
"if WANT_SINGLE_PRECISION_REAL"
)
print
(
"check_SCRIPTS += "
+
name
+
"_extended.sh"
)
print
(
"noinst_PROGRAMS += "
+
name
)
print
(
name
+
"_SOURCES = test/Fortran/test_skewsymmetric.F90"
)
print
(
name
+
"_LDADD = $(test_program_ldadd)"
)
print
(
name
+
"_FCFLAGS = $(test_program_fcflags)
\\
"
)
print
(
" "
+
"
\\\n
"
.
join
([
domain_flag
[
'real'
],
prec_flag
[
'single'
]]))
print
(
"endif"
)
name
=
"validate_multiple_objs_real_double_c_version"
print
(
"if ENABLE_C_TESTS"
)
print
(
"if ENABLE_AUTOTUNING"
)
...
...
src/elpa.F90
View file @
5d0b533f
...
...
@@ -111,7 +111,7 @@
!>
!> ! We urge the user to always check the error code of all ELPA functions
!>
!> if (elpa_init(201
8
111
2
) /= ELPA_OK) then
!> if (elpa_init(201
9
111
0
) /= ELPA_OK) then
!> print *, "ELPA API version not supported"
!> stop
!> endif
...
...
@@ -178,7 +178,7 @@
!>
!> /* We urge the user to always check the error code of all ELPA functions */
!>
!> if (elpa_init(201
8
111
3
) != ELPA_OK) {
!> if (elpa_init(201
9
111
0
) != ELPA_OK) {
!> fprintf(stderr, "Error: ELPA API version not supported");
!> exit(1);
!> }
...
...
@@ -233,7 +233,7 @@
!> class(elpa_autotune_t), pointer :: tune_state
!> integer :: success
!>
!> if (elpa_init(201
8
111
2
) /= ELPA_OK) then
!> if (elpa_init(201
9
111
0
) /= ELPA_OK) then
!> print *, "ELPA API version not supported"
!> stop
!> endif
...
...
src/elpa1/elpa1_compute_private.F90
View file @
5d0b533f
...
...
@@ -105,7 +105,9 @@ module elpa1_compute
public
::
elpa_reduce_add_vectors_real_double
public
::
elpa_reduce_add_vectors_real
public
::
elpa_transpose_vectors_real_double
public
::
elpa_transpose_vectors_ss_real_double
public
::
elpa_transpose_vectors_real
public
::
elpa_transpose_vectors_ss_real
interface
hh_transform_real
module
procedure
hh_transform_real_double
...
...
@@ -118,11 +120,16 @@ module elpa1_compute
interface
elpa_transpose_vectors_real
module
procedure
elpa_transpose_vectors_real_double
end
interface
interface
elpa_transpose_vectors_ss_real
module
procedure
elpa_transpose_vectors_ss_real_double
end
interface
#ifdef WANT_SINGLE_PRECISION_REAL
public
::
hh_transform_real_single
public
::
elpa_reduce_add_vectors_real_single
public
::
elpa_transpose_vectors_real_single
public
::
elpa_transpose_vectors_ss_real_single
#endif
public
::
hh_transform_complex_double
...
...
@@ -130,7 +137,9 @@ module elpa1_compute
public
::
elpa_reduce_add_vectors_complex_double
public
::
elpa_reduce_add_vectors_complex
public
::
elpa_transpose_vectors_complex_double
public
::
elpa_transpose_vectors_ss_complex_double
public
::
elpa_transpose_vectors_complex
public
::
elpa_transpose_vectors_ss_complex
interface
hh_transform_complex
module
procedure
hh_transform_complex_double
...
...
@@ -143,11 +152,16 @@ module elpa1_compute
interface
elpa_transpose_vectors_complex
module
procedure
elpa_transpose_vectors_complex_double
end
interface
interface
elpa_transpose_vectors_ss_complex
module
procedure
elpa_transpose_vectors_ss_complex_double
end
interface
#ifdef WANT_SINGLE_PRECISION_COMPLEX
public
::
hh_transform_complex_single
public
::
elpa_reduce_add_vectors_complex_single
public
::
elpa_transpose_vectors_complex_single
public
::
elpa_transpose_vectors_ss_complex_single
#endif
contains
...
...
@@ -158,7 +172,11 @@ module elpa1_compute
#define DOUBLE_PRECISION 1
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC_BUILD
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC_BUILD
#include "elpa_reduce_add_vectors.F90"
#undef DOUBLE_PRECISION
#undef REALCASE
...
...
@@ -170,6 +188,9 @@ module elpa1_compute
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC_BUILD
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC_BUILD
#include "elpa_reduce_add_vectors.F90"
#undef SINGLE_PRECISION
#undef REALCASE
...
...
@@ -181,6 +202,9 @@ module elpa1_compute
#define DOUBLE_PRECISION 1
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC_BUILD
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC_BUILD
#include "elpa_reduce_add_vectors.F90"
#undef COMPLEXCASE
#undef DOUBLE_PRECISION
...
...
@@ -191,6 +215,9 @@ module elpa1_compute
#define SINGLE_PRECISION 1
#include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC_BUILD
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC_BUILD
#include "elpa_reduce_add_vectors.F90"
#undef COMPLEXCASE
#undef SINGLE_PRECISION
...
...
src/elpa1/elpa1_template.F90
View file @
5d0b533f
...
...
@@ -78,7 +78,11 @@ function elpa_solve_evp_&
MATH_DATATYPE
(
kind
=
rck
),
optional
,
target
,
intent
(
out
)
::
q
(
obj
%
local_nrows
,
*
)
#else
MATH_DATATYPE
(
kind
=
rck
),
intent
(
inout
)
::
a
(
obj
%
local_nrows
,
obj
%
local_ncols
)
MATH_DATATYPE
(
kind
=
rck
),
optional
,
target
,
intent
(
out
)
::
q
(
obj
%
local_nrows
,
obj
%
local_ncols
)
#ifdef HAVE_SKEWSYMMETRIC
MATH_DATATYPE
(
kind
=
C_DATATYPE_KIND
),
optional
,
target
,
intent
(
out
)
::
q
(
obj
%
local_nrows
,
2
*
obj
%
local_ncols
)
#else
MATH_DATATYPE
(
kind
=
C_DATATYPE_KIND
),
optional
,
target
,
intent
(
out
)
::
q
(
obj
%
local_nrows
,
obj
%
local_ncols
)
#endif
#endif
#if REALCASE == 1
...
...
@@ -92,11 +96,15 @@ function elpa_solve_evp_&
complex
(
kind
=
C_DATATYPE_KIND
),
allocatable
::
tau
(:)
complex
(
kind
=
C_DATATYPE_KIND
),
allocatable
,
target
::
q_dummy
(:,:)
complex
(
kind
=
C_DATATYPE_KIND
),
pointer
::
q_actual
(:,:)
#endif /* COMPLEXCASE */
integer
(
kind
=
c_int
)
::
l_cols
,
l_rows
,
l_cols_nev
,
np_rows
,
np_cols
integer
(
kind
=
MPI_KIND
)
::
np_rowsMPI
,
np_colsMPI
#endif /* COMPLEXCASE */
logical
::
useGPU
integer
(
kind
=
c_int
)
::
skewsymmetric
logical
::
isSkewsymmetric
logical
::
success
logical
::
do_useGPU
,
do_useGPU_tridiag
,
&
...
...
@@ -115,7 +123,8 @@ function elpa_solve_evp_&
logical
::
do_tridiag
,
do_solve
,
do_trans_ev
integer
(
kind
=
ik
)
::
nrThreads
integer
(
kind
=
ik
)
::
global_index
call
obj
%
timer
%
start
(
"elpa_solve_evp_&
&MATH_DATATYPE&
&_1stage_&
...
...
@@ -203,7 +212,15 @@ function elpa_solve_evp_&
else
useGPU
=
.false.
endif
call
obj
%
get
(
"is_skewsymmetric"
,
skewsymmetric
,
error
)
if
(
error
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric
=
(
skewsymmetric
==
1
)
call
obj
%
timer
%
start
(
"mpi_communication"
)
call
mpi_comm_rank
(
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
my_prowMPI
,
mpierr
)
...
...
@@ -212,13 +229,11 @@ function elpa_solve_evp_&
my_prow
=
int
(
my_prowMPI
,
kind
=
c_int
)
my_pcol
=
int
(
my_pcolMPI
,
kind
=
c_int
)
#if COMPLEXCASE == 1
call
mpi_comm_size
(
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
np_rowsmPI
,
mpierr
)
call
mpi_comm_size
(
int
(
mpi_comm_cols
,
kind
=
MPI_KIND
),
np_colsmPI
,
mpierr
)
call
mpi_comm_size
(
int
(
mpi_comm_rows
,
kind
=
MPI_KIND
),
np_rowsMPI
,
mpierr
)
call
mpi_comm_size
(
int
(
mpi_comm_cols
,
kind
=
MPI_KIND
),
np_colsMPI
,
mpierr
)
np_rows
=
int
(
np_rowsMPI
,
kind
=
c_int
)
np_cols
=
int
(
np_colsMPI
,
kind
=
c_int
)
#endif
call
obj
%
timer
%
stop
(
"mpi_communication"
)
...
...
@@ -403,17 +418,52 @@ function elpa_solve_evp_&
#if COMPLEXCASE == 1
q
(
1
:
l_rows
,
1
:
l_cols_nev
)
=
q_real
(
1
:
l_rows
,
1
:
l_cols_nev
)
#endif
if
(
isSkewsymmetric
)
then
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
q
(
1
:
obj
%
local_nrows
,
obj
%
local_ncols
+1
:
2
*
obj
%
local_ncols
)
=
0.0
do
i
=
1
,
obj
%
local_nrows
! global_index = indxl2g(i, nblk, my_prow, 0, np_rows)
global_index
=
np_rows
*
nblk
*
((
i
-1
)/
nblk
)
+
MOD
(
i
-1
,
nblk
)
+
MOD
(
np_rows
+
my_prow
-0
,
np_rows
)
*
nblk
+
1
if
(
mod
(
global_index
-1
,
4
)
.eq.
0
)
then
! do nothing
end
if
if
(
mod
(
global_index
-1
,
4
)
.eq.
1
)
then
q
(
i
,
obj
%
local_ncols
+1
:
2
*
obj
%
local_ncols
)
=
q
(
i
,
1
:
obj
%
local_ncols
)
q
(
i
,
1
:
obj
%
local_ncols
)
=
0
end
if
if
(
mod
(
global_index
-1
,
4
)
.eq.
2
)
then
q
(
i
,
1
:
obj
%
local_ncols
)
=
-
q
(
i
,
1
:
obj
%
local_ncols
)
end
if
if
(
mod
(
global_index
-1
,
4
)
.eq.
3
)
then
q
(
i
,
obj
%
local_ncols
+1
:
2
*
obj
%
local_ncols
)
=
-
q
(
i
,
1
:
obj
%
local_ncols
)
q
(
i
,
1
:
obj
%
local_ncols
)
=
0
end
if
end
do
endif
call
obj
%
timer
%
start
(
"back"
)
#ifdef HAVE_LIKWID
call
likwid_markerStartRegion
(
"trans_ev"
)
#endif
! In the skew-symmetric case this transforms the real part
call
trans_ev_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
&
(
obj
,
na
,
nev
,
a
,
lda
,
tau
,
q
,
ldq
,
nblk
,
matrixCols
,
mpi_comm_rows
,
mpi_comm_cols
,
do_useGPU_trans_ev
)
if
(
isSkewsymmetric
)
then
! Transform imaginary part
! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix.
call
trans_ev_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
&
(
obj
,
na
,
nev
,
a
,
lda
,
tau
,
q
(
1
:
obj
%
local_nrows
,
obj
%
local_ncols
+1
:
2
*
obj
%
local_ncols
),
ldq
,
nblk
,
matrixCols
,
&
mpi_comm_rows
,
mpi_comm_cols
,
do_useGPU_trans_ev
)
endif
#ifdef HAVE_LIKWID
call
likwid_markerStopRegion
(
"trans_ev"
)
...
...
src/elpa1/elpa1_tridiag_template.F90
View file @
5d0b533f
...
...
@@ -110,6 +110,8 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
class
(
elpa_abstract_impl_t
),
intent
(
inout
)
::
obj
integer
(
kind
=
ik
),
intent
(
in
)
::
na
,
lda
,
nblk
,
matrixCols
,
mpi_comm_rows
,
mpi_comm_cols
logical
,
intent
(
in
)
::
useGPU
,
wantDebug
integer
(
kind
=
c_int
)
::
skewsymmetric
logical
::
isSkewsymmetric
MATH_DATATYPE
(
kind
=
rck
),
intent
(
out
)
::
tau
(
na
)
#ifdef USE_ASSUMED_SIZE
...
...
@@ -181,6 +183,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
&
PRECISION
&
&
_
&
&
MATH_DATATYPE
call
obj
%
get
(
"is_skewsymmetric"
,
skewsymmetric
,
istat
)
if
(
istat
.ne.
ELPA_OK
)
then
print
*
,
"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric
=
(
skewsymmetric
==
1
)
if
(
useGPU
)
then
gpuString
=
"_gpu"
else
...
...
@@ -538,10 +547,16 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
v_row
(
l_row_beg
),
1_BLAS_KIND
,
ONE
,
uc_p
(
l_col_beg
,
my_thread
),
1_BLAS_KIND
)
if
(
i
/
=
j
)
then
call
PRECISION_GEMV
(
'N'
,
int
(
l_row_end
-
l_row_beg
+1
,
kind
=
BLAS_KIND
),
int
(
l_col_end
-
l_col_beg
+1
,
kind
=
BLAS_KIND
),
&
ONE
,
a_mat
(
l_row_beg
,
l_col_beg
),
int
(
lda
,
kind
=
BLAS_KIND
),
v_col
(
l_col_beg
),
1_BLAS_KIND
,
&
ONE
,
ur_p
(
l_row_beg
,
my_thread
),
1_BLAS_KIND
)
if
(
isSkewsymmetric
)
then
call
PRECISION_GEMV
(
'N'
,
int
(
l_row_end
-
l_row_beg
+1
,
kind
=
BLAS_KIND
),
int
(
l_col_end
-
l_col_beg
+1
,
kind
=
BLAS_KIND
),
&
-
ONE
,
a_mat
(
l_row_beg
,
l_col_beg
),
int
(
lda
,
kind
=
BLAS_KIND
),
v_col
(
l_col_beg
),
1_BLAS_KIND
,
&
ONE
,
ur_p
(
l_row_beg
,
my_thread
),
1_BLAS_KIND
)
else
call
PRECISION_GEMV
(
'N'
,
int
(
l_row_end
-
l_row_beg
+1
,
kind
=
BLAS_KIND
),
int
(
l_col_end
-
l_col_beg
+1
,
kind
=
BLAS_KIND
),
&
ONE
,
a_mat
(
l_row_beg
,
l_col_beg
),
int
(
lda
,
kind
=
BLAS_KIND
),
v_col
(
l_col_beg
),
1_BLAS_KIND
,
&
ONE
,
ur_p
(
l_row_beg
,
my_thread
),
1_BLAS_KIND
)
endif
endif
if
(
wantDebug
)
call
obj
%
timer
%
stop
(
"blas"
)
endif
...
...
@@ -560,11 +575,16 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
ONE
,
u_col
(
l_col_beg
),
1_BLAS_KIND
)
if
(
i
/
=
j
)
then
call
PRECISION_GEMV
(
'N'
,
int
(
l_row_end
-
l_row_beg
+1
,
kind
=
BLAS_KIND
),
int
(
l_col_end
-
l_col_beg
+1
,
kind
=
BLAS_KIND
),
&
ONE
,
a_mat
(
l_row_beg
,
l_col_beg
),
int
(
lda
,
kind
=
BLAS_KIND
),
&
v_col
(
l_col_beg
),
1_BLAS_KIND
,
&
ONE
,
u_row
(
l_row_beg
),
1_BLAS_KIND
)
if
(
isSkewsymmetric
)
then
call
PRECISION_GEMV
(
'N'
,
int
(
l_row_end
-
l_row_beg
+1
,
kind
=
BLAS_KIND
),
int
(
l_col_end
-
l_col_beg
+1
,
kind
=
BLAS_KIND
),
&
-
ONE
,
a_mat
(
l_row_beg
,
l_col_beg
),
int
(
lda
,
kind
=
BLAS_KIND
),
&
v_col
(
l_col_beg
),
1_BLAS_KIND
,
ONE
,
u_row
(
l_row_beg
),
1_BLAS_KIND
)
else
call
PRECISION_GEMV
(
'N'
,
int
(
l_row_end
-
l_row_beg
+1
,
kind
=
BLAS_KIND
),
int
(
l_col_end
-
l_col_beg
+1
,
kind
=
BLAS_KIND
),
&
ONE
,
a_mat
(
l_row_beg
,
l_col_beg
),
int
(
lda
,
kind
=
BLAS_KIND
),
&
v_col
(
l_col_beg
),
1_BLAS_KIND
,
ONE
,
u_row
(
l_row_beg
),
1_BLAS_KIND
)
endif
endif
if
(
wantDebug
)
call
obj
%
timer
%
stop
(
"blas"
)
endif
! not useGPU
...
...
@@ -626,11 +646,17 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
a_offset
=
((
l_row_beg
-1
)
+
(
l_col_beg
-
1
)
*
lda
)
*
&
size_of_datatype
call
cublas_PRECISION_GEMV
(
'N'
,
l_row_end
-
l_row_beg
+1
,
l_col_end
-
l_col_beg
+1
,
&
ONE
,
a_dev
+
a_offset
,
lda
,
&
v_col_dev
+
(
l_col_beg
-
1
)
*
size_of_datatype
,
1
,
&
ONE
,
u_row_dev
+
(
l_row_beg
-
1
)
*
size_of_datatype
,
1
)
if
(
isSkewsymmetric
)
then
call
cublas_PRECISION_GEMV
(
'N'
,
l_row_end
-
l_row_beg
+1
,
l_col_end
-
l_col_beg
+1
,
&
-
ONE
,
a_dev
+
a_offset
,
lda
,
&
v_col_dev
+
(
l_col_beg
-
1
)
*
size_of_datatype
,
1
,
&
ONE
,
u_row_dev
+
(
l_row_beg
-
1
)
*
size_of_datatype
,
1
)
else
call
cublas_PRECISION_GEMV
(
'N'
,
l_row_end
-
l_row_beg
+1
,
l_col_end
-
l_col_beg
+1
,
&
ONE
,
a_dev
+
a_offset
,
lda
,
&
v_col_dev
+
(
l_col_beg
-
1
)
*
size_of_datatype
,
1
,
&
ONE
,
u_row_dev
+
(
l_row_beg
-
1
)
*
size_of_datatype
,
1
)
endif
enddo
end
if
!multiplication as one block / per stripes
...
...
@@ -710,13 +736,21 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
u_col
=
tmp
#endif /* WITH_MPI */
endif
call
elpa_transpose_vectors_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
(
obj
,
u_col
,
ubound
(
u_col
,
dim
=
1
),
mpi_comm_cols
,
u_row
,
ubound
(
u_row
,
dim
=
1
),
&
mpi_comm_rows
,
1
,
istep
-1
,
1
,
nblk
,
max_threads
)
if
(
isSkewsymmetric
)
then
call
elpa_transpose_vectors_ss_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
(
obj
,
u_col
,
ubound
(
u_col
,
dim
=
1
),
mpi_comm_cols
,
u_row
,
ubound
(
u_row
,
dim
=
1
),
&
mpi_comm_rows
,
1
,
istep
-1
,
1
,
nblk
,
max_threads
)
else
call
elpa_transpose_vectors_
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
(
obj
,
u_col
,
ubound
(
u_col
,
dim
=
1
),
mpi_comm_cols
,
u_row
,
ubound
(
u_row
,
dim
=
1
),
&
mpi_comm_rows
,
1
,
istep
-1
,
1
,
nblk
,
max_threads
)
endif
! calculate u**T * v (same as v**T * (A + VU**T + UV**T) * v )
x
=
0
...
...
@@ -843,7 +877,11 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
+
dot_product
(
vu_stored_rows
(
l_rows
,
1
:
2
*
n_stored_vecs
),
uv_stored_cols
(
l_cols
,
1
:
2
*
n_stored_vecs
))
end
if
#if REALCASE == 1
d_vec
(
istep
-1
)
=
a_mat
(
l_rows
,
l_cols
)
if
(
isSkewsymmetric
)
then
d_vec
(
istep
-1
)
=
0.0_rk
else
d_vec
(
istep
-1
)
=
a_mat
(
l_rows
,
l_cols
)
endif
#endif
#if COMPLEXCASE == 1
d_vec
(
istep
-1
)
=
real
(
a_mat
(
l_rows
,
l_cols
),
kind
=
rk
)
...
...
@@ -937,7 +975,11 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
successCUDA
=
cuda_memcpy
(
int
(
loc
(
d_vec
(
1
)),
kind
=
c_intptr_t
),
a_dev
,
1
*
size_of_datatype
,
cudaMemcpyDeviceToHost
)
check_memcpy_cuda
(
"tridiag: a_dev 8"
,
successCUDA
)
else
!useGPU
d_vec
(
1
)
=
a_mat
(
1
,
1
)
if
(
isSkewsymmetric
)
then
d_vec
(
1
)
=
0.0_rk
else
d_vec
(
1
)
=
a_mat
(
1
,
1
)
endif
endif
!useGPU
endif
#endif
...
...
src/elpa1/elpa_transpose_vectors.F90
View file @
5d0b533f
...
...
@@ -50,7 +50,15 @@
#include "config-f90.h"
#include "../general/sanity.F90"
subroutine
elpa_transpose_vectors_
&
#undef ROUTINE_NAME
#ifdef SKEW_SYMMETRIC_BUILD
#define ROUTINE_NAME elpa_transpose_vectors_ss_
#else
#define ROUTINE_NAME elpa_transpose_vectors_
#endif
subroutine
ROUTINE_NAME
&
&
MATH_DATATYPE
&
&
_
&
&
PRECISION
&
...
...
@@ -98,7 +106,7 @@ subroutine elpa_transpose_vectors_&
integer
(
kind
=
ik
)
::
auxstride
integer
(
kind
=
ik
),
intent
(
in
)
::
nrThreads
call
obj
%
timer
%
start
(
"
elpa_transpose_vectors_
&
call
obj
%
timer
%
start
(
"
ROUTINE_NAME
&
&MATH_DATATYPE&
&"
//
&
&
PRECISION_SUFFIX
&
...
...
@@ -197,7 +205,11 @@ subroutine elpa_transpose_vectors_&
k
=
(
i
-
nblks_skip
-
n
)/
lcm_s_t
*
nblk
+
(
lc
-
1
)
*
auxstride
ns
=
(
i
/
npt
)
*
nblk
! local start of block i
nl
=
min
(
nvr
-
i
*
nblk
,
nblk
)
! length
#ifdef SKEW_SYMMETRIC_BUILD
vmat_t
(
ns
+1
:
ns
+
nl
,
lc
)
=
-
aux
(
k
+1
:
k
+
nl
)
#else
vmat_t
(
ns
+1
:
ns
+
nl
,
lc
)
=
aux
(
k
+1
:
k
+
nl
)
#endif
! k = k+nblk
enddo
enddo
...
...
@@ -210,7 +222,7 @@ subroutine elpa_transpose_vectors_&
#endif
deallocate
(
aux
)
call
obj
%
timer
%
stop
(
"
elpa_transpose_vectors_
&
call
obj
%
timer
%
stop
(
"
ROUTINE_NAME
&
&MATH_DATATYPE&
&"
//
&
&
PRECISION_SUFFIX
&
...
...
src/elpa1/elpa_transpose_vectors_ss.F90
0 → 100644
View file @
5d0b533f