Commit 5d0b533f authored by Andreas Marek's avatar Andreas Marek
Browse files

Merge branch 'master' of gitlab.mpcdf.mpg.de:amarek/elpa_skew_symmetric into skew

parents 05ca1675 214ddc43
Changelog for upcoming release Changelog for upcoming release
- not yet decided - support of ELPA for real valued skew-symmetric matrices, please see:
CITATION
Changelog for ELPA 2019.05.002 Changelog for ELPA 2019.05.002
......
...@@ -885,6 +885,7 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \ ...@@ -885,6 +885,7 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \
@top_srcdir@/src/elpa_c_interface.c \ @top_srcdir@/src/elpa_c_interface.c \
@top_srcdir@/src/elpa2/mod_pack_unpack_cpu.F90 \ @top_srcdir@/src/elpa2/mod_pack_unpack_cpu.F90 \
@top_srcdir@/src/elpa2/elpa2_symm_matrix_allreduce_real_template.F90 \ @top_srcdir@/src/elpa2/elpa2_symm_matrix_allreduce_real_template.F90 \
@top_srcdir@/src/elpa2/elpa2_ssymm_matrix_allreduce_real_template.F90 \
@top_srcdir@/src/elpa2/elpa2_tridiag_band_template.F90 \ @top_srcdir@/src/elpa2/elpa2_tridiag_band_template.F90 \
@top_srcdir@/src/elpa2/mod_redist_band.F90 \ @top_srcdir@/src/elpa2/mod_redist_band.F90 \
@top_srcdir@/src/elpa2/pack_unpack_cpu.F90 \ @top_srcdir@/src/elpa2/pack_unpack_cpu.F90 \
...@@ -1001,6 +1002,7 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \ ...@@ -1001,6 +1002,7 @@ EXCLUDE = @top_srcdir@/src/GPU/check_for_gpu.F90 \
@top_srcdir@/src/elpa1/elpa_solve_tridi_impl_public.F90 \ @top_srcdir@/src/elpa1/elpa_solve_tridi_impl_public.F90 \
@top_srcdir@/src/elpa1/elpa1_trans_ev_template.F90 \ @top_srcdir@/src/elpa1/elpa1_trans_ev_template.F90 \
@top_srcdir@/src/elpa1/elpa_transpose_vectors.F90 \ @top_srcdir@/src/elpa1/elpa_transpose_vectors.F90 \
@top_srcdir@/src/elpa1/elpa_transpose_vectors_ss.F90 \
@top_srcdir@/src/elpa1/elpa1_auxiliary.F90 \ @top_srcdir@/src/elpa1/elpa1_auxiliary.F90 \
@top_srcdir@/src/elpa1/elpa1_tridiag_template.F90 \ @top_srcdir@/src/elpa1/elpa1_tridiag_template.F90 \
@top_srcdir@/src/elpa1/elpa1_tools_template.F90 \ @top_srcdir@/src/elpa1/elpa1_tools_template.F90 \
......
...@@ -84,7 +84,7 @@ An excerpt of the most important (*ELPA* specific) options reads as follows: ...@@ -84,7 +84,7 @@ An excerpt of the most important (*ELPA* specific) options reads as follows:
| --disable-Fortran2008-features | disable Fortran 2008 if compiler does not support it | | --disable-Fortran2008-features | disable Fortran 2008 if compiler does not support it |
| --enable-pyhton | build and install python wrapper, default no | | --enable-pyhton | build and install python wrapper, default no |
| --enable-python-tests | enable python tests, default no. | | --enable-python-tests | enable python tests, default no. |
| --enable-skew-symmetric-support | enable support for real valued skew-symmetric matrices |
We recommend that you do not build ELPA in its main directory but that you use it We recommend that you do not build ELPA in its main directory but that you use it
in a sub-directory: in a sub-directory:
......
...@@ -64,8 +64,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \ ...@@ -64,8 +64,8 @@ libelpa@SUFFIX@_private_la_SOURCES = \
src/elpa1/elpa1.F90 \ src/elpa1/elpa1.F90 \
src/elpa2/elpa2.F90 \ src/elpa2/elpa2.F90 \
src/elpa_generalized/cannon.c \ src/elpa_generalized/cannon.c \
#src/elpa_generalized/test_c_bindings.c \
src/helpers/matrix_plot.F90 \ src/helpers/matrix_plot.F90 \
src/general/mod_elpa_skewsymmetric_blas.F90 \
src/elpa_index.c src/elpa_index.c
libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c libelpa@SUFFIX@_private_la_SOURCES += src/elpa_c_interface.c
...@@ -122,6 +122,8 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \ ...@@ -122,6 +122,8 @@ EXTRA_libelpa@SUFFIX@_private_la_DEPENDENCIES = \
src/elpa1/elpa_invert_trm.F90 \ src/elpa1/elpa_invert_trm.F90 \
src/elpa1/elpa_multiply_a_b.F90 \ src/elpa1/elpa_multiply_a_b.F90 \
src/elpa1/elpa_solve_tridi_impl_public.F90 \ src/elpa1/elpa_solve_tridi_impl_public.F90 \
src/general/elpa_ssr2_template.F90 \
src/general/elpa_ssmv_template.F90 \
src/general/precision_macros.h \ src/general/precision_macros.h \
src/general/precision_typedefs.h \ src/general/precision_typedefs.h \
src/general/precision_kinds.F90 src/general/precision_kinds.F90
...@@ -518,6 +520,10 @@ dist_man_MANS = \ ...@@ -518,6 +520,10 @@ dist_man_MANS = \
man/elpa_autotune_save_state.3 \ man/elpa_autotune_save_state.3 \
man/elpa_autotune_load_state.3 \ man/elpa_autotune_load_state.3 \
man/elpa_autotune_print_state.3 \ man/elpa_autotune_print_state.3 \
man/elpa_autotune_setup.3 \
man/elpa_autotune_step.3 \
man/elpa_autotune_set_best.3 \
man/elpa_autotune_deallocate.3 \
man/elpa_uninit.3 man/elpa_uninit.3
if ENABLE_LEGACY if ENABLE_LEGACY
...@@ -542,11 +548,7 @@ dist_man_MANS += \ ...@@ -542,11 +548,7 @@ dist_man_MANS += \
man/elpa_solve_evp_real_double.3 \ man/elpa_solve_evp_real_double.3 \
man/elpa_solve_evp_real_single.3 \ man/elpa_solve_evp_real_single.3 \
man/elpa_solve_evp_complex_double.3 \ man/elpa_solve_evp_complex_double.3 \
man/elpa_solve_evp_complex_single.3 \ man/elpa_solve_evp_complex_single.3
man/elpa_autotune_setup.3 \
man/elpa_autotune_step.3 \
man/elpa_autotune_set_best.3 \
man/elpa_autotune_deallocate.3
endif endif
...@@ -824,6 +826,8 @@ EXTRA_DIST = \ ...@@ -824,6 +826,8 @@ EXTRA_DIST = \
test/shared/test_precision_kinds.F90 \ test/shared/test_precision_kinds.F90 \
src/general/prow_pcol.F90 \ src/general/prow_pcol.F90 \
src/general/sanity.F90 \ src/general/sanity.F90 \
src/general/elpa_ssr2_template.F90 \
src/general/elpa_ssmv_template.F90 \
test/Fortran/assert.h \ test/Fortran/assert.h \
test/Fortran/elpa_print_headers.F90 \ test/Fortran/elpa_print_headers.F90 \
test/shared/test_check_correctness_template.F90 \ test/shared/test_check_correctness_template.F90 \
......
...@@ -39,7 +39,7 @@ AC_SUBST([$1], ['$2']) ...@@ -39,7 +39,7 @@ AC_SUBST([$1], ['$2'])
# API Version # API Version
AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version]) AC_DEFINE([EARLIEST_API_VERSION], [20170403], [Earliest supported ELPA API version])
AC_DEFINE_SUBST(CURRENT_API_VERSION, 20190524, "Current ELPA API version") AC_DEFINE_SUBST(CURRENT_API_VERSION, 20191110, "Current ELPA API version")
# Autotune Version # Autotune Version
AC_DEFINE([EARLIEST_AUTOTUNE_VERSION], [20171201], [Earliest ELPA API version, which supports autotuning]) AC_DEFINE([EARLIEST_AUTOTUNE_VERSION], [20171201], [Earliest ELPA API version, which supports autotuning])
AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20190524], [Current ELPA autotune version]) AC_DEFINE([CURRENT_AUTOTUNE_VERSION], [20190524], [Current ELPA autotune version])
...@@ -1451,6 +1451,27 @@ fi ...@@ -1451,6 +1451,27 @@ fi
AM_CONDITIONAL([WANT_SINGLE_PRECISION_REAL],[test x"$want_single_precision" = x"yes"]) AM_CONDITIONAL([WANT_SINGLE_PRECISION_REAL],[test x"$want_single_precision" = x"yes"])
AM_CONDITIONAL([WANT_SINGLE_PRECISION_COMPLEX],[test x"$want_single_precision" = x"yes"]) AM_CONDITIONAL([WANT_SINGLE_PRECISION_COMPLEX],[test x"$want_single_precision" = x"yes"])
#always define SKEWSYMMETRIC for the moment
AC_MSG_CHECKING(whether we should enable skew-symmetric support)
AC_ARG_ENABLE([skew-symmetric-support],
AS_HELP_STRING([--enable-skew-symmetric-support],
[enable support for real valued skew-symmetric matrices]),
[if test x"$enableval" = x"yes"; then
enable_skewsymmetric=yes
else
enable_skewsymmetric=no
fi],
[enable_skewsymmetric=no])
AC_MSG_RESULT([${enable_skewsymmetric}])
AM_CONDITIONAL([HAVE_SKEWSYMMETRIC],[test x"$enable_skewsymmetric" = x"yes"])
if test x"${enable_skewsymmetric}" = x"yes"; then
if test x"${USE_ASSUMED_SIZE}" = x"no"; then
AC_MSG_ERROR(you have to enable assumed size arrays, if you want to build with skew-symmetric support)
fi
AC_DEFINE([HAVE_SKEWSYMMETRIC],[1],[build for skewsyemmtric case])
fi
AC_SUBST([MPI_BINARY]) AC_SUBST([MPI_BINARY])
AC_SUBST([WITH_MKL]) AC_SUBST([WITH_MKL])
AC_SUBST([WITH_BLACS]) AC_SUBST([WITH_BLACS])
......
...@@ -4,6 +4,7 @@ import sys ...@@ -4,6 +4,7 @@ import sys
simple_tokens = [ simple_tokens = [
"PRECISION", "PRECISION",
"elpa_transpose_vectors_NUMBER_PRECISION", "elpa_transpose_vectors_NUMBER_PRECISION",
"elpa_transpose_vectors_ssNUMBER_PRECISION",
"elpa_reduce_add_vectors_NUMBER_PRECISION", "elpa_reduce_add_vectors_NUMBER_PRECISION",
"bandred_NUMBER_PRECISION", "bandred_NUMBER_PRECISION",
"trans_ev_band_to_full_NUMBER_PRECISION", "trans_ev_band_to_full_NUMBER_PRECISION",
...@@ -19,6 +20,7 @@ simple_tokens = [ ...@@ -19,6 +20,7 @@ simple_tokens = [
"qr_pdgeqrf_2dcomm_PRECISION", "qr_pdgeqrf_2dcomm_PRECISION",
"hh_transform_NUMBER_PRECISION", "hh_transform_NUMBER_PRECISION",
"symm_matrix_allreduce_PRECISION", "symm_matrix_allreduce_PRECISION",
"ssymm_matrix_allreduce_PRECISION",
"herm_matrix_allreduce_PRECISION", "herm_matrix_allreduce_PRECISION",
"redist_band_NUMBER_PRECISION", "redist_band_NUMBER_PRECISION",
"unpack_row_NUMBER_cpu_PRECISION", "unpack_row_NUMBER_cpu_PRECISION",
......
...@@ -285,6 +285,30 @@ print(" " + " \\\n ".join([ ...@@ -285,6 +285,30 @@ print(" " + " \\\n ".join([
prec_flag['double']])) prec_flag['double']]))
print("endif") print("endif")
name = "test_skewsymmetric_real_double"
print("check_SCRIPTS += " + name + "_extended.sh")
print("noinst_PROGRAMS += " + name)
print(name + "_SOURCES = test/Fortran/test_skewsymmetric.F90")
print(name + "_LDADD = $(test_program_ldadd)")
print(name + "_FCFLAGS = $(test_program_fcflags) \\")
print(" " + " \\\n ".join([
domain_flag['real'],
prec_flag['double']]))
name = "test_skewsymmetric_real_single"
print("if WANT_SINGLE_PRECISION_REAL")
print("check_SCRIPTS += " + name + "_extended.sh")
print("noinst_PROGRAMS += " + name)
print(name + "_SOURCES = test/Fortran/test_skewsymmetric.F90")
print(name + "_LDADD = $(test_program_ldadd)")
print(name + "_FCFLAGS = $(test_program_fcflags) \\")
print(" " + " \\\n ".join([
domain_flag['real'],
prec_flag['single']]))
print("endif")
name = "validate_multiple_objs_real_double_c_version" name = "validate_multiple_objs_real_double_c_version"
print("if ENABLE_C_TESTS") print("if ENABLE_C_TESTS")
print("if ENABLE_AUTOTUNING") print("if ENABLE_AUTOTUNING")
......
...@@ -111,7 +111,7 @@ ...@@ -111,7 +111,7 @@
!> !>
!> ! We urge the user to always check the error code of all ELPA functions !> ! We urge the user to always check the error code of all ELPA functions
!> !>
!> if (elpa_init(20181112) /= ELPA_OK) then !> if (elpa_init(20191110) /= ELPA_OK) then
!> print *, "ELPA API version not supported" !> print *, "ELPA API version not supported"
!> stop !> stop
!> endif !> endif
...@@ -178,7 +178,7 @@ ...@@ -178,7 +178,7 @@
!> !>
!> /* We urge the user to always check the error code of all ELPA functions */ !> /* We urge the user to always check the error code of all ELPA functions */
!> !>
!> if (elpa_init(20181113) != ELPA_OK) { !> if (elpa_init(20191110) != ELPA_OK) {
!> fprintf(stderr, "Error: ELPA API version not supported"); !> fprintf(stderr, "Error: ELPA API version not supported");
!> exit(1); !> exit(1);
!> } !> }
...@@ -233,7 +233,7 @@ ...@@ -233,7 +233,7 @@
!> class(elpa_autotune_t), pointer :: tune_state !> class(elpa_autotune_t), pointer :: tune_state
!> integer :: success !> integer :: success
!> !>
!> if (elpa_init(20181112) /= ELPA_OK) then !> if (elpa_init(20191110) /= ELPA_OK) then
!> print *, "ELPA API version not supported" !> print *, "ELPA API version not supported"
!> stop !> stop
!> endif !> endif
......
...@@ -105,7 +105,9 @@ module elpa1_compute ...@@ -105,7 +105,9 @@ module elpa1_compute
public :: elpa_reduce_add_vectors_real_double public :: elpa_reduce_add_vectors_real_double
public :: elpa_reduce_add_vectors_real public :: elpa_reduce_add_vectors_real
public :: elpa_transpose_vectors_real_double public :: elpa_transpose_vectors_real_double
public :: elpa_transpose_vectors_ss_real_double
public :: elpa_transpose_vectors_real public :: elpa_transpose_vectors_real
public :: elpa_transpose_vectors_ss_real
interface hh_transform_real interface hh_transform_real
module procedure hh_transform_real_double module procedure hh_transform_real_double
...@@ -118,11 +120,16 @@ module elpa1_compute ...@@ -118,11 +120,16 @@ module elpa1_compute
interface elpa_transpose_vectors_real interface elpa_transpose_vectors_real
module procedure elpa_transpose_vectors_real_double module procedure elpa_transpose_vectors_real_double
end interface end interface
interface elpa_transpose_vectors_ss_real
module procedure elpa_transpose_vectors_ss_real_double
end interface
#ifdef WANT_SINGLE_PRECISION_REAL #ifdef WANT_SINGLE_PRECISION_REAL
public :: hh_transform_real_single public :: hh_transform_real_single
public :: elpa_reduce_add_vectors_real_single public :: elpa_reduce_add_vectors_real_single
public :: elpa_transpose_vectors_real_single public :: elpa_transpose_vectors_real_single
public :: elpa_transpose_vectors_ss_real_single
#endif #endif
public :: hh_transform_complex_double public :: hh_transform_complex_double
...@@ -130,7 +137,9 @@ module elpa1_compute ...@@ -130,7 +137,9 @@ module elpa1_compute
public :: elpa_reduce_add_vectors_complex_double public :: elpa_reduce_add_vectors_complex_double
public :: elpa_reduce_add_vectors_complex public :: elpa_reduce_add_vectors_complex
public :: elpa_transpose_vectors_complex_double public :: elpa_transpose_vectors_complex_double
public :: elpa_transpose_vectors_ss_complex_double
public :: elpa_transpose_vectors_complex public :: elpa_transpose_vectors_complex
public :: elpa_transpose_vectors_ss_complex
interface hh_transform_complex interface hh_transform_complex
module procedure hh_transform_complex_double module procedure hh_transform_complex_double
...@@ -143,11 +152,16 @@ module elpa1_compute ...@@ -143,11 +152,16 @@ module elpa1_compute
interface elpa_transpose_vectors_complex interface elpa_transpose_vectors_complex
module procedure elpa_transpose_vectors_complex_double module procedure elpa_transpose_vectors_complex_double
end interface end interface
interface elpa_transpose_vectors_ss_complex
module procedure elpa_transpose_vectors_ss_complex_double
end interface
#ifdef WANT_SINGLE_PRECISION_COMPLEX #ifdef WANT_SINGLE_PRECISION_COMPLEX
public :: hh_transform_complex_single public :: hh_transform_complex_single
public :: elpa_reduce_add_vectors_complex_single public :: elpa_reduce_add_vectors_complex_single
public :: elpa_transpose_vectors_complex_single public :: elpa_transpose_vectors_complex_single
public :: elpa_transpose_vectors_ss_complex_single
#endif #endif
contains contains
...@@ -158,7 +172,11 @@ module elpa1_compute ...@@ -158,7 +172,11 @@ module elpa1_compute
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC_BUILD
#include "elpa_transpose_vectors.F90" #include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC_BUILD
#include "elpa_reduce_add_vectors.F90" #include "elpa_reduce_add_vectors.F90"
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
#undef REALCASE #undef REALCASE
...@@ -170,6 +188,9 @@ module elpa1_compute ...@@ -170,6 +188,9 @@ module elpa1_compute
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90" #include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC_BUILD
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC_BUILD
#include "elpa_reduce_add_vectors.F90" #include "elpa_reduce_add_vectors.F90"
#undef SINGLE_PRECISION #undef SINGLE_PRECISION
#undef REALCASE #undef REALCASE
...@@ -181,6 +202,9 @@ module elpa1_compute ...@@ -181,6 +202,9 @@ module elpa1_compute
#define DOUBLE_PRECISION 1 #define DOUBLE_PRECISION 1
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90" #include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC_BUILD
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC_BUILD
#include "elpa_reduce_add_vectors.F90" #include "elpa_reduce_add_vectors.F90"
#undef COMPLEXCASE #undef COMPLEXCASE
#undef DOUBLE_PRECISION #undef DOUBLE_PRECISION
...@@ -191,6 +215,9 @@ module elpa1_compute ...@@ -191,6 +215,9 @@ module elpa1_compute
#define SINGLE_PRECISION 1 #define SINGLE_PRECISION 1
#include "../general/precision_macros.h" #include "../general/precision_macros.h"
#include "elpa_transpose_vectors.F90" #include "elpa_transpose_vectors.F90"
#define SKEW_SYMMETRIC_BUILD
#include "elpa_transpose_vectors.F90"
#undef SKEW_SYMMETRIC_BUILD
#include "elpa_reduce_add_vectors.F90" #include "elpa_reduce_add_vectors.F90"
#undef COMPLEXCASE #undef COMPLEXCASE
#undef SINGLE_PRECISION #undef SINGLE_PRECISION
......
...@@ -78,7 +78,11 @@ function elpa_solve_evp_& ...@@ -78,7 +78,11 @@ function elpa_solve_evp_&
MATH_DATATYPE(kind=rck), optional,target,intent(out) :: q(obj%local_nrows,*) MATH_DATATYPE(kind=rck), optional,target,intent(out) :: q(obj%local_nrows,*)
#else #else
MATH_DATATYPE(kind=rck), intent(inout) :: a(obj%local_nrows,obj%local_ncols) MATH_DATATYPE(kind=rck), intent(inout) :: a(obj%local_nrows,obj%local_ncols)
MATH_DATATYPE(kind=rck), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols) #ifdef HAVE_SKEWSYMMETRIC
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,2*obj%local_ncols)
#else
MATH_DATATYPE(kind=C_DATATYPE_KIND), optional, target, intent(out) :: q(obj%local_nrows,obj%local_ncols)
#endif
#endif #endif
#if REALCASE == 1 #if REALCASE == 1
...@@ -92,11 +96,15 @@ function elpa_solve_evp_& ...@@ -92,11 +96,15 @@ function elpa_solve_evp_&
complex(kind=C_DATATYPE_KIND), allocatable :: tau(:) complex(kind=C_DATATYPE_KIND), allocatable :: tau(:)
complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:) complex(kind=C_DATATYPE_KIND), allocatable,target :: q_dummy(:,:)
complex(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:) complex(kind=C_DATATYPE_KIND), pointer :: q_actual(:,:)
#endif /* COMPLEXCASE */
integer(kind=c_int) :: l_cols, l_rows, l_cols_nev, np_rows, np_cols integer(kind=c_int) :: l_cols, l_rows, l_cols_nev, np_rows, np_cols
integer(kind=MPI_KIND) :: np_rowsMPI, np_colsMPI integer(kind=MPI_KIND) :: np_rowsMPI, np_colsMPI
#endif /* COMPLEXCASE */
logical :: useGPU logical :: useGPU
integer(kind=c_int) :: skewsymmetric
logical :: isSkewsymmetric
logical :: success logical :: success
logical :: do_useGPU, do_useGPU_tridiag, & logical :: do_useGPU, do_useGPU_tridiag, &
...@@ -115,7 +123,8 @@ function elpa_solve_evp_& ...@@ -115,7 +123,8 @@ function elpa_solve_evp_&
logical :: do_tridiag, do_solve, do_trans_ev logical :: do_tridiag, do_solve, do_trans_ev
integer(kind=ik) :: nrThreads integer(kind=ik) :: nrThreads
integer(kind=ik) :: global_index
call obj%timer%start("elpa_solve_evp_& call obj%timer%start("elpa_solve_evp_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_1stage_& &_1stage_&
...@@ -203,7 +212,15 @@ function elpa_solve_evp_& ...@@ -203,7 +212,15 @@ function elpa_solve_evp_&
else else
useGPU = .false. useGPU = .false.
endif endif
call obj%get("is_skewsymmetric",skewsymmetric,error)
if (error .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)
call obj%timer%start("mpi_communication") call obj%timer%start("mpi_communication")
call mpi_comm_rank(int(mpi_comm_rows,kind=MPI_KIND), my_prowMPI, mpierr) call mpi_comm_rank(int(mpi_comm_rows,kind=MPI_KIND), my_prowMPI, mpierr)
...@@ -212,13 +229,11 @@ function elpa_solve_evp_& ...@@ -212,13 +229,11 @@ function elpa_solve_evp_&
my_prow = int(my_prowMPI,kind=c_int) my_prow = int(my_prowMPI,kind=c_int)
my_pcol = int(my_pcolMPI,kind=c_int) my_pcol = int(my_pcolMPI,kind=c_int)
#if COMPLEXCASE == 1 call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND), np_rowsMPI, mpierr)
call mpi_comm_size(int(mpi_comm_rows,kind=MPI_KIND), np_rowsmPI, mpierr) call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND), np_colsMPI, mpierr)
call mpi_comm_size(int(mpi_comm_cols,kind=MPI_KIND), np_colsmPI, mpierr)
np_rows = int(np_rowsMPI,kind=c_int) np_rows = int(np_rowsMPI,kind=c_int)
np_cols = int(np_colsMPI,kind=c_int) np_cols = int(np_colsMPI,kind=c_int)
#endif
call obj%timer%stop("mpi_communication") call obj%timer%stop("mpi_communication")
...@@ -403,17 +418,52 @@ function elpa_solve_evp_& ...@@ -403,17 +418,52 @@ function elpa_solve_evp_&
#if COMPLEXCASE == 1 #if COMPLEXCASE == 1
q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev) q(1:l_rows,1:l_cols_nev) = q_real(1:l_rows,1:l_cols_nev)
#endif #endif
if (isSkewsymmetric) then
! Extra transformation step for skew-symmetric matrix. Multiplication with diagonal complex matrix D.
! This makes the eigenvectors complex.
! For now real part of eigenvectors is generated in first half of q, imaginary part in second part.
q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols) = 0.0
do i = 1, obj%local_nrows
! global_index = indxl2g(i, nblk, my_prow, 0, np_rows)
global_index = np_rows*nblk*((i-1)/nblk) + MOD(i-1,nblk) + MOD(np_rows+my_prow-0, np_rows)*nblk + 1
if (mod(global_index-1,4) .eq. 0) then
! do nothing
end if
if (mod(global_index-1,4) .eq. 1) then
q(i,obj%local_ncols+1:2*obj%local_ncols) = q(i,1:obj%local_ncols)
q(i,1:obj%local_ncols) = 0
end if
if (mod(global_index-1,4) .eq. 2) then
q(i,1:obj%local_ncols) = -q(i,1:obj%local_ncols)
end if
if (mod(global_index-1,4) .eq. 3) then
q(i,obj%local_ncols+1:2*obj%local_ncols) = -q(i,1:obj%local_ncols)
q(i,1:obj%local_ncols) = 0
end if
end do
endif
call obj%timer%start("back") call obj%timer%start("back")
#ifdef HAVE_LIKWID #ifdef HAVE_LIKWID
call likwid_markerStartRegion("trans_ev") call likwid_markerStartRegion("trans_ev")
#endif #endif
! In the skew-symmetric case this transforms the real part
call trans_ev_& call trans_ev_&
&MATH_DATATYPE& &MATH_DATATYPE&
&_& &_&
&PRECISION& &PRECISION&
& (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev) & (obj, na, nev, a, lda, tau, q, ldq, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
if (isSkewsymmetric) then
! Transform imaginary part
! Transformation of real and imaginary part could also be one call of trans_ev_tridi acting on the n x 2n matrix.
call trans_ev_&
&MATH_DATATYPE&
&_&
&PRECISION&
& (obj, na, nev, a, lda, tau, q(1:obj%local_nrows, obj%local_ncols+1:2*obj%local_ncols), ldq, nblk, matrixCols, &
mpi_comm_rows, mpi_comm_cols, do_useGPU_trans_ev)
endif
#ifdef HAVE_LIKWID #ifdef HAVE_LIKWID
call likwid_markerStopRegion("trans_ev") call likwid_markerStopRegion("trans_ev")
......
...@@ -110,6 +110,8 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ ...@@ -110,6 +110,8 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
class(elpa_abstract_impl_t), intent(inout) :: obj class(elpa_abstract_impl_t), intent(inout) :: obj
integer(kind=ik), intent(in) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols integer(kind=ik), intent(in) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
logical, intent(in) :: useGPU, wantDebug logical, intent(in) :: useGPU, wantDebug
integer(kind=c_int) :: skewsymmetric
logical :: isSkewsymmetric
MATH_DATATYPE(kind=rck), intent(out) :: tau(na) MATH_DATATYPE(kind=rck), intent(out) :: tau(na)
#ifdef USE_ASSUMED_SIZE #ifdef USE_ASSUMED_SIZE
...@@ -181,6 +183,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_ ...@@ -181,6 +183,13 @@ call prmat(na,useGpu,a_mat,a_dev,lda,matrixCols,nblk,my_prow,my_pcol,np_rows,np_
&PRECISION& &PRECISION&
&_& &_&
&MATH_DATATYPE &MATH_DATATYPE
call obj%get("is_skewsymmetric",skewsymmetric,istat)
if (istat .ne. ELPA_OK) then
print *,"Problem getting option. Aborting..."
stop
endif
isSkewsymmetric = (skewsymmetric == 1)